1#!/usr/local/bin/python3.8
2
3"""
4asciidoc - converts an AsciiDoc text file to HTML or DocBook
5
6Copyright (C) 2002-2013 Stuart Rackham.
7Copyright (C) 2013-2020 AsciiDoc Contributors.
8
9Free use of this software is granted under the terms of the GNU General
10Public License version 2 (GPLv2).
11"""
12
13import ast
14import copy
15import csv
16from functools import lru_cache
17import getopt
18import io
19import locale
20import math
21import os
22import re
23import shutil
24import subprocess
25import sys
26import tempfile
27import time
28import traceback
29import unicodedata
30import zipfile
31
32from ast import literal_eval
33from collections import OrderedDict
34
35# Used by asciidocapi.py #
36VERSION = '9.1.1'           # See CHANGELOG file for version history.
37
38MIN_PYTHON_VERSION = (3, 5)  # Require this version of Python or better.
39
40# ---------------------------------------------------------------------------
41# Program constants.
42# ---------------------------------------------------------------------------
43DEFAULT_BACKEND = 'html'
44DEFAULT_DOCTYPE = 'article'
45# Allowed substitution options for List, Paragraph and DelimitedBlock
46# definition subs entry.
47SUBS_OPTIONS = ('specialcharacters', 'quotes', 'specialwords',
48                'replacements', 'attributes', 'macros', 'callouts', 'normal', 'verbatim',
49                'none', 'replacements2', 'replacements3')
50# Default value for unspecified subs and presubs configuration file entries.
51SUBS_NORMAL = ('specialcharacters', 'quotes', 'attributes',
52               'specialwords', 'replacements', 'macros', 'replacements2')
53SUBS_VERBATIM = ('specialcharacters', 'callouts')
54
55NAME_RE = r'[^\W\d][-\w]*'  # Valid section or attribute name.
56OR, AND = ',', '+'              # Attribute list separators.
57DEFAULT_NEWLINE = '\r\n'
58
59# ---------------------------------------------------------------------------
60# Utility functions and classes.
61# ---------------------------------------------------------------------------
62
63
64class EAsciiDoc(Exception):
65    pass
66
67
68class AttrDict(dict):
69    """
70    Like a dictionary except values can be accessed as attributes i.e. obj.foo
71    can be used in addition to obj['foo'].
72    If an item is not present None is returned.
73    """
74    def __getattr__(self, key):
75        try:
76            return self[key]
77        except KeyError:
78            return None
79
80    def __setattr__(self, key, value):
81        self[key] = value
82
83    def __delattr__(self, key):
84        try:
85            del self[key]
86        except KeyError as k:
87            raise AttributeError(k)
88
89    def __repr__(self):
90        return '<AttrDict ' + dict.__repr__(self) + '>'
91
92    def __getstate__(self):
93        return dict(self)
94
95    def __setstate__(self, value):
96        for k, v in list(value.items()):
97            self[k] = v
98
99
100class InsensitiveDict(dict):
101    """
102    Like a dictionary except key access is case insensitive.
103    Keys are stored in lower case.
104    """
105    def __getitem__(self, key):
106        return dict.__getitem__(self, key.lower())
107
108    def __setitem__(self, key, value):
109        dict.__setitem__(self, key.lower(), value)
110
111    def has_key(self, key):
112        return key.lower() in self
113
114    def get(self, key, default=None):
115        return dict.get(self, key.lower(), default)
116
117    def update(self, dict):
118        for k, v in list(dict.items()):
119            self[k] = v
120
121    def setdefault(self, key, default=None):
122        return dict.setdefault(self, key.lower(), default)
123
124
125class Trace(object):
126    """
127    Used in conjunction with the 'trace' attribute to generate diagnostic
128    output. There is a single global instance of this class named trace.
129    """
130    SUBS_NAMES = ('specialcharacters', 'quotes', 'specialwords',
131                  'replacements', 'attributes', 'macros', 'callouts',
132                  'replacements2', 'replacements3')
133
134    def __init__(self):
135        self.name_re = ''        # Regexp pattern to match trace names.
136        self.linenos = True
137        self.offset = 0
138
139    def __call__(self, name, before, after=None):
140        """
141        Print trace message if tracing is on and the trace 'name' matches the
142        document 'trace' attribute (treated as a regexp).
143        'before' is the source text before substitution; 'after' text is the
144        source text after substitution.
145        The 'before' and 'after' messages are only printed if they differ.
146        """
147        name_re = document.attributes.get('trace')
148        if name_re == 'subs':    # Alias for all the inline substitutions.
149            name_re = '|'.join(self.SUBS_NAMES)
150        self.name_re = name_re
151        if self.name_re is not None:
152            msg = message.format(name, 'TRACE: ', self.linenos, offset=self.offset)
153            if before != after and re.match(self.name_re, name):
154                if is_array(before):
155                    before = '\n'.join(before)
156                if after is None:
157                    msg += '\n%s\n' % before
158                else:
159                    if is_array(after):
160                        after = '\n'.join(after)
161                    msg += '\n<<<\n%s\n>>>\n%s\n' % (before, after)
162                message.stderr(msg)
163
164
165class Message:
166    """
167    Message functions.
168    """
169    PROG = os.path.basename(os.path.splitext(__file__)[0])
170
171    def __init__(self):
172        # Set to True or False to globally override line numbers method
173        # argument. Has no effect when set to None.
174        self.linenos = None
175        self.messages = []
176        self.prev_msg = ''
177
178    @staticmethod
179    def stdout(msg):
180        print(msg)
181
182    def stderr(self, msg=''):
183        if msg == self.prev_msg:  # Suppress repeated messages.
184            return
185        self.messages.append(msg)
186        if __name__ == '__main__':
187            sys.stderr.write('%s: %s%s' % (self.PROG, msg, os.linesep))
188        self.prev_msg = msg
189
190    def verbose(self, msg, linenos=True):
191        if config.verbose:
192            msg = self.format(msg, linenos=linenos)
193            self.stderr(msg)
194
195    def warning(self, msg, linenos=True, offset=0):
196        msg = self.format(msg, 'WARNING: ', linenos, offset=offset)
197        document.has_warnings = True
198        self.stderr(msg)
199
200    def deprecated(self, msg, linenos=True):
201        msg = self.format(msg, 'DEPRECATED: ', linenos)
202        self.stderr(msg)
203
204    def format(self, msg, prefix='', linenos=True, cursor=None, offset=0):
205        """Return formatted message string."""
206        if self.linenos is not False and ((linenos or self.linenos) and reader.cursor):
207            if cursor is None:
208                cursor = reader.cursor
209            prefix += '%s: line %d: ' % (os.path.basename(cursor[0]), cursor[1]+offset)
210        return prefix + msg
211
212    def error(self, msg, cursor=None, halt=False):
213        """
214        Report fatal error.
215        If halt=True raise EAsciiDoc exception.
216        If halt=False don't exit application, continue in the hope of reporting
217        all fatal errors finishing with a non-zero exit code.
218        """
219        if halt:
220            raise EAsciiDoc(self.format(msg, linenos=False, cursor=cursor))
221        else:
222            msg = self.format(msg, 'ERROR: ', cursor=cursor)
223            self.stderr(msg)
224            document.has_errors = True
225
226    def unsafe(self, msg):
227        self.error('unsafe: '+msg)
228
229
230def userdir():
231    """
232    Return user's home directory or None if it is not defined.
233    """
234    result = os.path.expanduser('~')
235    if result == '~':
236        result = None
237    return result
238
239
240def localapp():
241    """
242    Return True if we are not executing the system wide version
243    i.e. the configuration is in the executable's directory.
244    """
245    return os.path.isfile(os.path.join(APP_DIR, 'asciidoc.conf'))
246
247
248def file_in(fname, directory):
249    """Return True if file fname resides inside directory."""
250    assert os.path.isfile(fname)
251    # Empty directory (not to be confused with None) is the current directory.
252    if directory == '':
253        directory = os.getcwd()
254    else:
255        assert os.path.isdir(directory)
256        directory = os.path.realpath(directory)
257    fname = os.path.realpath(fname)
258    return os.path.commonprefix((directory, fname)) == directory
259
260
261def safe():
262    return document.safe
263
264
265def is_safe_file(fname, directory=None):
266    # A safe file must reside in 'directory' (defaults to the source
267    # file directory).
268    if directory is None:
269        if document.infile == '<stdin>':
270            return not safe()
271        directory = os.path.dirname(document.infile)
272    elif directory == '':
273        directory = '.'
274    return (
275        not safe() or
276        file_in(fname, directory) or
277        file_in(fname, APP_DIR) or
278        file_in(fname, CONF_DIR)
279    )
280
281
282def safe_filename(fname, parentdir):
283    """
284    Return file name which must reside in the parent file directory.
285    Return None if file is not safe.
286    """
287    if not os.path.isabs(fname):
288        # Include files are relative to parent document
289        # directory.
290        fname = os.path.normpath(os.path.join(parentdir, fname))
291    if not is_safe_file(fname, parentdir):
292        message.unsafe('include file: %s' % fname)
293        return None
294    return fname
295
296
297def assign(dst, src):
298    """Assign all attributes from 'src' object to 'dst' object."""
299    for a, v in list(src.__dict__.items()):
300        setattr(dst, a, v)
301
302
303def strip_quotes(s):
304    """Trim white space and, if necessary, quote characters from s."""
305    s = s.strip()
306    # Strip quotation mark characters from quoted strings.
307    if len(s) >= 3 and s[0] == '"' and s[-1] == '"':
308        s = s[1:-1]
309    return s
310
311
312def is_re(s):
313    """Return True if s is a valid regular expression else return False."""
314    try:
315        re.compile(s)
316    except:
317        return False
318    else:
319        return True
320
321
322def re_join(relist):
323    """Join list of regular expressions re1,re2,... to single regular
324    expression (re1)|(re2)|..."""
325    if len(relist) == 0:
326        return None
327    result = []
328    # Delete named groups to avoid ambiguity.
329    for s in relist:
330        result.append(re.sub(r'\?P<\S+?>', '', s))
331    result = ')|('.join(result)
332    result = '(' + result + ')'
333    return result
334
335
336def lstrip_list(s):
337    """
338    Return list with empty items from start of list removed.
339    """
340    for i in range(len(s)):
341        if s[i]:
342            break
343    else:
344        return []
345    return s[i:]
346
347
348def rstrip_list(s):
349    """
350    Return list with empty items from end of list removed.
351    """
352    for i in range(len(s) - 1, -1, -1):
353        if s[i]:
354            break
355    else:
356        return []
357    return s[:i + 1]
358
359
360def strip_list(s):
361    """
362    Return list with empty items from start and end of list removed.
363    """
364    s = lstrip_list(s)
365    s = rstrip_list(s)
366    return s
367
368
369def is_array(obj):
370    """
371    Return True if object is list or tuple type.
372    """
373    return isinstance(obj, list) or isinstance(obj, tuple)
374
375
376def dovetail(lines1, lines2):
377    """
378    Append list or tuple of strings 'lines2' to list 'lines1'.  Join the last
379    non-blank item in 'lines1' with the first non-blank item in 'lines2' into a
380    single string.
381    """
382    assert is_array(lines1)
383    assert is_array(lines2)
384    lines1 = strip_list(lines1)
385    lines2 = strip_list(lines2)
386    if not lines1 or not lines2:
387        return list(lines1) + list(lines2)
388    result = list(lines1[:-1])
389    result.append(lines1[-1] + lines2[0])
390    result += list(lines2[1:])
391    return result
392
393
394def dovetail_tags(stag, content, etag):
395    """Merge the end tag with the first content line and the last
396    content line with the end tag. This ensures verbatim elements don't
397    include extraneous opening and closing line breaks."""
398    return dovetail(dovetail(stag, content), etag)
399
400
401def py2round(n, d=0):
402    """Utility function to get python2 rounding in python3. Python3 changed it such that
403    given two equally close multiples, it'll round towards the even choice. For example,
404    round(42.5) == 42 instead of the expected round(42.5) == 43). This function gives us
405    back that functionality."""
406    p = 10 ** d
407    return float(math.floor((n * p) + math.copysign(0.5, n))) / p
408
409
410def get_args(val):
411    d = {}
412    args = ast.parse("d(" + val + ")", mode='eval').body.args
413    i = 1
414    for arg in args:
415        if isinstance(arg, ast.Name):
416            d[str(i)] = literal_eval(arg.id)
417        else:
418            d[str(i)] = literal_eval(arg)
419        i += 1
420    return d
421
422
423def get_kwargs(val):
424    d = {}
425    args = ast.parse("d(" + val + ")", mode='eval').body.keywords
426    for arg in args:
427        d[arg.arg] = literal_eval(arg.value)
428    return d
429
430
431def parse_to_list(val):
432    values = ast.parse("[" + val + "]", mode='eval').body.elts
433    return [literal_eval(v) for v in values]
434
435
436def parse_attributes(attrs, dict):
437    """Update a dictionary with name/value attributes from the attrs string.
438    The attrs string is a comma separated list of values and keyword name=value
439    pairs. Values must precede keywords and are named '1','2'... The entire
440    attributes list is named '0'. If keywords are specified string values must
441    be quoted. Examples:
442
443    attrs: ''
444    dict: {}
445
446    attrs: 'hello,world'
447    dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
448
449    attrs: '"hello", planet="earth"'
450    dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
451    """
452    def f(*args, **keywords):
453        # Name and add arguments '1','2'... to keywords.
454        for i in range(len(args)):
455            if not str(i + 1) in keywords:
456                keywords[str(i + 1)] = args[i]
457        return keywords
458
459    if not attrs:
460        return
461    dict['0'] = attrs
462    # Replace line separators with spaces so line spanning works.
463    s = re.sub(r'\s', ' ', attrs)
464    d = {}
465    try:
466        d.update(get_args(s))
467        d.update(get_kwargs(s))
468        for v in list(d.values()):
469            if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, float) or v is None):
470                raise Exception
471    except Exception:
472        s = s.replace('"', '\\"')
473        s = s.split(',')
474        s = ['"' + x.strip() + '"' for x in s]
475        s = ','.join(s)
476        try:
477            d = {}
478            d.update(get_args(s))
479            d.update(get_kwargs(s))
480        except Exception:
481            return  # If there's a syntax error leave with {0}=attrs.
482        for k in list(d.keys()):  # Drop any empty positional arguments.
483            if d[k] == '':
484                del d[k]
485    dict.update(d)
486    assert len(d) > 0
487
488
489def parse_named_attributes(s, attrs):
490    """Update a attrs dictionary with name="value" attributes from the s string.
491    Returns False if invalid syntax.
492    Example:
493    attrs: 'star="sun",planet="earth"'
494    dict: {'planet':'earth', 'star':'sun'}
495    """
496    def f(**keywords):
497        return keywords
498
499    try:
500        d = {}
501        d = get_kwargs(s)
502        attrs.update(d)
503        return True
504    except Exception:
505        return False
506
507
508def parse_list(s):
509    """Parse comma separated string of Python literals. Return a tuple of of
510    parsed values."""
511    try:
512        result = tuple(parse_to_list(s))
513    except Exception:
514        raise EAsciiDoc('malformed list: ' + s)
515    return result
516
517
518def parse_options(options, allowed, errmsg):
519    """Parse comma separated string of unquoted option names and return as a
520    tuple of valid options. 'allowed' is a list of allowed option values.
521    If allowed=() then all legitimate names are allowed.
522    'errmsg' is an error message prefix if an illegal option error is thrown."""
523    result = []
524    if options:
525        for s in re.split(r'\s*,\s*', options):
526            if (allowed and s not in allowed) or not is_name(s):
527                raise EAsciiDoc('%s: %s' % (errmsg, s))
528            result.append(s)
529    return tuple(result)
530
531
532def symbolize(s):
533    """Drop non-symbol characters and convert to lowercase."""
534    return re.sub(r'[^\w\-_]', '', s).lower()
535
536
537def is_name(s):
538    """Return True if s is valid attribute, macro or tag name
539    (starts with alpha containing alphanumeric and dashes only)."""
540    return re.match(r'^' + NAME_RE + r'$', s) is not None
541
542
543def subs_quotes(text):
544    """Quoted text is marked up and the resulting text is
545    returned."""
546    keys = list(config.quotes.keys())
547    for q in keys:
548        i = q.find('|')
549        if i != -1 and q != '|' and q != '||':
550            lq = q[:i]      # Left quote.
551            rq = q[i + 1:]    # Right quote.
552        else:
553            lq = rq = q
554        tag = config.quotes[q]
555        if not tag:
556            continue
557        # Unconstrained quotes prefix the tag name with a hash.
558        if tag[0] == '#':
559            tag = tag[1:]
560            # Unconstrained quotes can appear anywhere.
561            reo = re.compile(r'(?ms)(^|.)(\[(?P<attrlist>[^[\]]+?)\])?' +
562                             r'(?:' + re.escape(lq) + r')' +
563                             r'(?P<content>.+?)(?:' + re.escape(rq) + r')')
564        else:
565            # The text within constrained quotes must be bounded by white space.
566            # Non-word (\W) characters are allowed at boundaries to accommodate
567            # enveloping quotes and punctuation e.g. a='x', ('x'), 'x', ['x'].
568            reo = re.compile(r'(?ms)(^|[^\w;:}])(\[(?P<attrlist>[^[\]]+?)\])?' +
569                             r'(?:' + re.escape(lq) + r')' +
570                             r'(?P<content>\S|\S.*?\S)(?:' + re.escape(rq) + r')(?=\W|$)')
571        pos = 0
572        while True:
573            mo = reo.search(text, pos)
574            if not mo:
575                break
576            if text[mo.start()] == '\\':
577                # Delete leading backslash.
578                text = text[:mo.start()] + text[mo.start() + 1:]
579                # Skip past start of match.
580                pos = mo.start() + 1
581            else:
582                attrlist = {}
583                parse_attributes(mo.group('attrlist'), attrlist)
584                stag, etag = config.tag(tag, attrlist)
585                s = mo.group(1) + stag + mo.group('content') + etag
586                text = text[:mo.start()] + s + text[mo.end():]
587                pos = mo.start() + len(s)
588    return text
589
590
591def subs_tag(tag, dict={}):
592    """Perform attribute substitution and split tag string returning start, end
593    tag tuple (c.f. Config.tag())."""
594    if not tag:
595        return [None, None]
596    s = subs_attrs(tag, dict)
597    if not s:
598        message.warning('tag \'%s\' dropped: contains undefined attribute' % tag)
599        return [None, None]
600    result = s.split('|')
601    if len(result) == 1:
602        return result + [None]
603    elif len(result) == 2:
604        return result
605    else:
606        raise EAsciiDoc('malformed tag: %s' % tag)
607
608
609def parse_entry(entry, dict=None, unquote=False, unique_values=False,
610                allow_name_only=False, escape_delimiter=True):
611    """Parse name=value entry to dictionary 'dict'. Return tuple (name,value)
612    or None if illegal entry.
613    If name= then value is set to ''.
614    If name and allow_name_only=True then value is set to ''.
615    If name! and allow_name_only=True then value is set to None.
616    Leading and trailing white space is striped from 'name' and 'value'.
617    'name' can contain any printable characters.
618    If the '=' delimiter character is allowed in  the 'name' then
619    it must be escaped with a backslash and escape_delimiter must be True.
620    If 'unquote' is True leading and trailing double-quotes are stripped from
621    'name' and 'value'.
622    If unique_values' is True then dictionary entries with the same value are
623    removed before the parsed entry is added."""
624    if escape_delimiter:
625        mo = re.search(r'(?:[^\\](=))', entry)
626    else:
627        mo = re.search(r'(=)', entry)
628    if mo:  # name=value entry.
629        if mo.group(1):
630            name = entry[:mo.start(1)]
631            if escape_delimiter:
632                name = name.replace(r'\=', '=')  # Un-escape \= in name.
633            value = entry[mo.end(1):]
634    elif allow_name_only and entry:         # name or name! entry.
635        name = entry
636        if name[-1] == '!':
637            name = name[:-1]
638            value = None
639        else:
640            value = ''
641    else:
642        return None
643    if unquote:
644        name = strip_quotes(name)
645        if value is not None:
646            value = strip_quotes(value)
647    else:
648        name = name.strip()
649        if value is not None:
650            value = value.strip()
651    if not name:
652        return None
653    if dict is not None:
654        if unique_values:
655            for k, v in list(dict.items()):
656                if v == value:
657                    del dict[k]
658        dict[name] = value
659    return name, value
660
661
662def parse_entries(entries, dict, unquote=False, unique_values=False,
663                  allow_name_only=False, escape_delimiter=True):
664    """Parse name=value entries from  from lines of text in 'entries' into
665    dictionary 'dict'. Blank lines are skipped."""
666    entries = config.expand_templates(entries)
667    for entry in entries:
668        if entry and not parse_entry(entry, dict, unquote, unique_values,
669                                     allow_name_only, escape_delimiter):
670            raise EAsciiDoc('malformed section entry: %s' % entry)
671
672
673def dump_section(name, dict, f=sys.stdout):
674    """Write parameters in 'dict' as in configuration file section format with
675    section 'name'."""
676    f.write('[%s]%s' % (name, writer.newline))
677    for k, v in list(dict.items()):
678        k = str(k)
679        k = k.replace('=', r'\=')    # Escape = in name.
680        # Quote if necessary.
681        if len(k) != len(k.strip()):
682            k = '"' + k + '"'
683        if v and len(v) != len(v.strip()):
684            v = '"' + v + '"'
685        if v is None:
686            # Don't dump undefined attributes.
687            continue
688        else:
689            s = k + '=' + v
690        if s[0] == '#':
691            s = '\\' + s    # Escape so not treated as comment lines.
692        f.write('%s%s' % (s, writer.newline))
693    f.write(writer.newline)
694
695
696def update_attrs(attrs, dict):
697    """Update 'attrs' dictionary with parsed attributes in dictionary 'dict'."""
698    for k, v in list(dict.items()):
699        if not is_name(k):
700            raise EAsciiDoc('illegal attribute name: %s' % k)
701        attrs[k] = v
702
703
704def is_attr_defined(attrs, dic):
705    """
706    Check if the sequence of attributes is defined in dictionary 'dic'.
707    Valid 'attrs' sequence syntax:
708    <attr> Return True if single attribute is defined.
709    <attr1>,<attr2>,... Return True if one or more attributes are defined.
710    <attr1>+<attr2>+... Return True if all the attributes are defined.
711    """
712    if OR in attrs:
713        for a in attrs.split(OR):
714            if dic.get(a.strip()) is not None:
715                return True
716        else:
717            return False
718    elif AND in attrs:
719        for a in attrs.split(AND):
720            if dic.get(a.strip()) is None:
721                return False
722        else:
723            return True
724    else:
725        return dic.get(attrs.strip()) is not None
726
727
728def filter_lines(filter_cmd, lines, attrs={}):
729    """
730    Run 'lines' through the 'filter_cmd' shell command and return the result.
731    The 'attrs' dictionary contains additional filter attributes.
732    """
733    def findfilter(name, dir, filter):
734        """Find filter file 'fname' with style name 'name' in directory
735        'dir'. Return found file path or None if not found."""
736        if name:
737            result = os.path.join(dir, 'filters', name, filter)
738            if os.path.isfile(result):
739                return result
740        result = os.path.join(dir, 'filters', filter)
741        if os.path.isfile(result):
742            return result
743        return None
744
745    # Return input lines if there's not filter.
746    if not filter_cmd or not filter_cmd.strip():
747        return lines
748    # Perform attributes substitution on the filter command.
749    s = subs_attrs(filter_cmd, attrs)
750    if not s:
751        message.error('undefined filter attribute in command: %s' % filter_cmd)
752        return []
753    filter_cmd = s.strip()
754    # Parse for quoted and unquoted command and command tail.
755    # Double quoted.
756    mo = re.match(r'^"(?P<cmd>[^"]+)"(?P<tail>.*)$', filter_cmd)
757    if not mo:
758        # Single quoted.
759        mo = re.match(r"^'(?P<cmd>[^']+)'(?P<tail>.*)$", filter_cmd)
760        if not mo:
761            # Unquoted catch all.
762            mo = re.match(r'^(?P<cmd>\S+)(?P<tail>.*)$', filter_cmd)
763    cmd = mo.group('cmd').strip()
764    found = None
765    if not os.path.dirname(cmd):
766        # Filter command has no directory path so search filter directories.
767        filtername = attrs.get('style')
768        d = document.attributes.get('docdir')
769        if d:
770            found = findfilter(filtername, d, cmd)
771        if not found:
772            if USER_DIR:
773                found = findfilter(filtername, USER_DIR, cmd)
774            if not found:
775                if localapp():
776                    found = findfilter(filtername, APP_DIR, cmd)
777                else:
778                    found = findfilter(filtername, CONF_DIR, cmd)
779    else:
780        if os.path.isfile(cmd):
781            found = cmd
782        else:
783            message.warning('filter not found: %s' % cmd)
784    if found:
785        filter_cmd = '"' + found + '"' + mo.group('tail')
786    if found:
787        if cmd.endswith('.py'):
788            filter_cmd = '"%s" %s' % (document.attributes['python'],
789                                      filter_cmd)
790        elif cmd.endswith('.rb'):
791            filter_cmd = 'ruby ' + filter_cmd
792
793    message.verbose('filtering: ' + filter_cmd)
794    if os.name == 'nt':
795        # Remove redundant quoting -- this is not just
796        # cosmetic, unnecessary quoting appears to cause
797        # command line truncation.
798        filter_cmd = re.sub(r'"([^ ]+?)"', r'\1', filter_cmd)
799    try:
800        p = subprocess.Popen(filter_cmd, shell=True,
801                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
802        output = p.communicate(os.linesep.join(lines).encode("utf-8"))[0].decode('utf-8')
803    except Exception:
804        raise EAsciiDoc('filter error: %s: %s' % (filter_cmd, sys.exc_info()[1]))
805    if output:
806        result = [s.rstrip() for s in output.split(os.linesep)]
807    else:
808        result = []
809    filter_status = p.wait()
810    if filter_status:
811        message.warning('filter non-zero exit code: %s: returned %d' % (filter_cmd, filter_status))
812    if lines and not result:
813        message.warning('no output from filter: %s' % filter_cmd)
814    return result
815
816
817def system(name, args, is_macro=False, attrs=None):
818    """
819    Evaluate a system attribute ({name:args}) or system block macro
820    (name::[args]).
821    If is_macro is True then we are processing a system block macro otherwise
822    it's a system attribute.
823    The attrs dictionary is updated by the counter and set system attributes.
824    NOTE: The include1 attribute is used internally by the include1::[] macro
825    and is not for public use.
826    """
827    if is_macro:
828        syntax = '%s::[%s]' % (name, args)
829        separator = '\n'
830    else:
831        syntax = '{%s:%s}' % (name, args)
832        separator = writer.newline
833    if name not in ('eval', 'eval3', 'sys', 'sys2', 'sys3', 'include',
834                    'include1', 'counter', 'counter2', 'set', 'set2', 'template'):
835        if is_macro:
836            msg = 'illegal system macro name: %s' % name
837        else:
838            msg = 'illegal system attribute name: %s' % name
839        message.warning(msg)
840        return None
841    if is_macro:
842        s = subs_attrs(args)
843        if s is None:
844            message.warning('skipped %s: undefined attribute in: %s' % (name, args))
845            return None
846        args = s
847    if name != 'include1':
848        message.verbose('evaluating: %s' % syntax)
849    if safe() and name not in ('include', 'include1'):
850        message.unsafe(syntax)
851        return None
852    result = None
853    if name in ('eval', 'eval3'):
854        try:
855            result = eval(args)
856            if result is True:
857                result = ''
858            elif result is False:
859                result = None
860            elif result is not None:
861                result = str(result)
862        except Exception:
863            message.warning('%s: evaluation error' % syntax)
864    elif name in ('sys', 'sys2', 'sys3'):
865        result = ''
866        fd, tmp = tempfile.mkstemp()
867        os.close(fd)
868        try:
869            cmd = args
870            cmd = cmd + (' > "%s"' % tmp)
871            if name == 'sys2':
872                cmd = cmd + ' 2>&1'
873            if os.name == 'nt':
874                # Remove redundant quoting -- this is not just
875                # cosmetic, unnecessary quoting appears to cause
876                # command line truncation.
877                cmd = re.sub(r'"([^ ]+?)"', r'\1', cmd)
878            message.verbose('shelling: %s' % cmd)
879            if os.system(cmd):
880                message.warning('%s: non-zero exit status' % syntax)
881            try:
882                if os.path.isfile(tmp):
883                    with open(tmp, encoding='utf-8') as f:
884                        lines = [s.rstrip() for s in f]
885                else:
886                    lines = []
887            except Exception:
888                raise EAsciiDoc('%s: temp file read error' % syntax)
889            result = separator.join(lines)
890        finally:
891            if os.path.isfile(tmp):
892                os.remove(tmp)
893    elif name in ('counter', 'counter2'):
894        mo = re.match(r'^(?P<attr>[^:]*?)(:(?P<seed>.*))?$', args)
895        attr = mo.group('attr')
896        seed = mo.group('seed')
897        if seed and (not re.match(r'^\d+$', seed) and len(seed) > 1):
898            message.warning('%s: illegal counter seed: %s' % (syntax, seed))
899            return None
900        if not is_name(attr):
901            message.warning('%s: illegal attribute name' % syntax)
902            return None
903        value = document.attributes.get(attr)
904        if value:
905            if not re.match(r'^\d+$', value) and len(value) > 1:
906                message.warning(
907                    '%s: illegal counter value: %s' % (syntax, value)
908                )
909                return None
910            if re.match(r'^\d+$', value):
911                expr = value + '+1'
912            else:
913                expr = 'chr(ord("%s")+1)' % value
914            try:
915                result = str(eval(expr))
916            except Exception:
917                message.warning('%s: evaluation error: %s' % (syntax, expr))
918        else:
919            if seed:
920                result = seed
921            else:
922                result = '1'
923        document.attributes[attr] = result
924        if attrs is not None:
925            attrs[attr] = result
926        if name == 'counter2':
927            result = ''
928    elif name in ('set', 'set2'):
929        mo = re.match(r'^(?P<attr>[^:]*?)(:(?P<value>.*))?$', args)
930        attr = mo.group('attr')
931        value = mo.group('value')
932        if value is None:
933            value = ''
934        if attr.endswith('!'):
935            attr = attr[:-1]
936            value = None
937        if not is_name(attr):
938            message.warning('%s: illegal attribute name' % syntax)
939        else:
940            if attrs is not None:
941                attrs[attr] = value
942            if name != 'set2':  # set2 only updates local attributes.
943                document.attributes[attr] = value
944        if value is None:
945            result = None
946        else:
947            result = ''
948    elif name == 'include':
949        if not os.path.exists(args):
950            message.warning('%s: file does not exist' % syntax)
951        elif not is_safe_file(args):
952            message.unsafe(syntax)
953        else:
954            with open(args, encoding='utf-8') as f:
955                result = [s.rstrip() for s in f]
956            if result:
957                result = subs_attrs(result)
958                result = separator.join(result)
959                result = result.expandtabs(reader.tabsize)
960            else:
961                result = ''
962    elif name == 'include1':
963        result = separator.join(config.include1[args])
964    elif name == 'template':
965        if args not in config.sections:
966            message.warning('%s: template does not exist' % syntax)
967        else:
968            result = []
969            for line in config.sections[args]:
970                line = subs_attrs(line)
971                if line is not None:
972                    result.append(line)
973            result = config.newline.join(result)
974    else:
975        assert False
976    if result and name in ('eval3', 'sys3'):
977        macros.passthroughs.append(result)
978        result = '\x07' + str(len(macros.passthroughs) - 1) + '\x07'
979    return result
980
981
982def subs_attrs(lines, dictionary=None):
983    """Substitute 'lines' of text with attributes from the global
984    document.attributes dictionary and from 'dictionary' ('dictionary'
985    entries take precedence). Return a tuple of the substituted lines.  'lines'
986    containing undefined attributes are deleted. If 'lines' is a string then
987    return a string.
988
989    - Attribute references are substituted in the following order: simple,
990      conditional, system.
991    - Attribute references inside 'dictionary' entry values are substituted.
992    """
993
994    def end_brace(text, start):
995        """Return index following end brace that matches brace at start in
996        text."""
997        assert text[start] == '{'
998        n = 0
999        result = start
1000        for c in text[start:]:
1001            # Skip braces that are followed by a backslash.
1002            if result == len(text) - 1 or text[result + 1] != '\\':
1003                if c == '{':
1004                    n = n + 1
1005                elif c == '}':
1006                    n = n - 1
1007            result = result + 1
1008            if n == 0:
1009                break
1010        return result
1011
1012    if type(lines) == str:
1013        string_result = True
1014        lines = [lines]
1015    else:
1016        string_result = False
1017    if dictionary is None:
1018        attrs = document.attributes
1019    else:
1020        # Remove numbered document attributes so they don't clash with
1021        # attribute list positional attributes.
1022        attrs = {}
1023        for k, v in list(document.attributes.items()):
1024            if not re.match(r'^\d+$', k):
1025                attrs[k] = v
1026        # Substitute attribute references inside dictionary values.
1027        for k, v in list(dictionary.items()):
1028            if v is None:
1029                del dictionary[k]
1030            else:
1031                v = subs_attrs(str(v))
1032                if v is None:
1033                    del dictionary[k]
1034                else:
1035                    dictionary[k] = v
1036        attrs.update(dictionary)
1037    # Substitute all attributes in all lines.
1038    result = []
1039    for line in lines:
1040        # Make it easier for regular expressions.
1041        line = line.replace('\\{', '{\\')
1042        line = line.replace('\\}', '}\\')
1043        # Expand simple attributes ({name}).
1044        # Nested attributes not allowed.
1045        reo = re.compile(r'(?s)\{(?P<name>[^\\\W][-\w]*?)\}(?!\\)')
1046        pos = 0
1047        while True:
1048            mo = reo.search(line, pos)
1049            if not mo:
1050                break
1051            s = attrs.get(mo.group('name'))
1052            if s is None:
1053                pos = mo.end()
1054            else:
1055                s = str(s)
1056                line = line[:mo.start()] + s + line[mo.end():]
1057                pos = mo.start() + len(s)
1058        # Expand conditional attributes.
1059        # Single name -- higher precedence.
1060        reo1 = re.compile(
1061            r'(?s)\{(?P<name>[^\\\W][-\w]*?)'
1062            r'(?P<op>\=|\?|!|#|%|@|\$)'
1063            r'(?P<value>.*?)\}(?!\\)'
1064        )
1065        # Multiple names (n1,n2,... or n1+n2+...) -- lower precedence.
1066        reo2 = re.compile(
1067            r'(?s)\{(?P<name>[^\\\W][-\w' + OR + AND + r']*?)'
1068            r'(?P<op>\=|\?|!|#|%|@|\$)'
1069            r'(?P<value>.*?)\}(?!\\)'
1070        )
1071        for reo in [reo1, reo2]:
1072            pos = 0
1073            while True:
1074                mo = reo.search(line, pos)
1075                if not mo:
1076                    break
1077                attr = mo.group()
1078                name = mo.group('name')
1079                if reo == reo2:
1080                    if OR in name:
1081                        sep = OR
1082                    else:
1083                        sep = AND
1084                    names = [s.strip() for s in name.split(sep) if s.strip()]
1085                    for n in names:
1086                        if not re.match(r'^[^\\\W][-\w]*$', n):
1087                            message.error('illegal attribute syntax: %s' % attr)
1088                    if sep == OR:
1089                        # Process OR name expression: n1,n2,...
1090                        for n in names:
1091                            if attrs.get(n) is not None:
1092                                lval = ''
1093                                break
1094                        else:
1095                            lval = None
1096                    else:
1097                        # Process AND name expression: n1+n2+...
1098                        for n in names:
1099                            if attrs.get(n) is None:
1100                                lval = None
1101                                break
1102                        else:
1103                            lval = ''
1104                else:
1105                    lval = attrs.get(name)
1106                op = mo.group('op')
1107                # mo.end() not good enough because '{x={y}}' matches '{x={y}'.
1108                end = end_brace(line, mo.start())
1109                rval = line[mo.start('value'):end - 1]
1110                UNDEFINED = '{zzzzz}'
1111                if lval is None:
1112                    if op == '=':
1113                        s = rval
1114                    elif op == '?':
1115                        s = ''
1116                    elif op == '!':
1117                        s = rval
1118                    elif op == '#':
1119                        s = UNDEFINED  # So the line is dropped.
1120                    elif op == '%':
1121                        s = rval
1122                    elif op in ('@', '$'):
1123                        s = UNDEFINED  # So the line is dropped.
1124                    else:
1125                        assert False, 'illegal attribute: %s' % attr
1126                else:
1127                    if op == '=':
1128                        s = lval
1129                    elif op == '?':
1130                        s = rval
1131                    elif op == '!':
1132                        s = ''
1133                    elif op == '#':
1134                        s = rval
1135                    elif op == '%':
1136                        s = UNDEFINED  # So the line is dropped.
1137                    elif op in ('@', '$'):
1138                        v = re.split(r'(?<!\\):', rval)
1139                        if len(v) not in (2, 3):
1140                            message.error('illegal attribute syntax: %s' % attr)
1141                            s = ''
1142                        elif not is_re('^' + v[0] + '$'):
1143                            message.error('illegal attribute regexp: %s' % attr)
1144                            s = ''
1145                        else:
1146                            v = [s.replace('\\:', ':') for s in v]
1147                            re_mo = re.match('^' + v[0] + '$', lval)
1148                            if op == '@':
1149                                if re_mo:
1150                                    s = v[1]         # {<name>@<re>:<v1>[:<v2>]}
1151                                else:
1152                                    if len(v) == 3:   # {<name>@<re>:<v1>:<v2>}
1153                                        s = v[2]
1154                                    else:             # {<name>@<re>:<v1>}
1155                                        s = ''
1156                            else:
1157                                if re_mo:
1158                                    if len(v) == 2:   # {<name>$<re>:<v1>}
1159                                        s = v[1]
1160                                    elif v[1] == '':  # {<name>$<re>::<v2>}
1161                                        s = UNDEFINED  # So the line is dropped.
1162                                    else:             # {<name>$<re>:<v1>:<v2>}
1163                                        s = v[1]
1164                                else:
1165                                    if len(v) == 2:   # {<name>$<re>:<v1>}
1166                                        s = UNDEFINED  # So the line is dropped.
1167                                    else:             # {<name>$<re>:<v1>:<v2>}
1168                                        s = v[2]
1169                    else:
1170                        assert False, 'illegal attribute: %s' % attr
1171                s = str(s)
1172                line = line[:mo.start()] + s + line[end:]
1173                pos = mo.start() + len(s)
1174        # Drop line if it contains  unsubstituted {name} references.
1175        skipped = re.search(r'(?s)\{[^\\\W][-\w]*?\}(?!\\)', line)
1176        if skipped:
1177            trace('dropped line', line)
1178            continue
1179        # Expand system attributes (eval has precedence).
1180        reos = [
1181            re.compile(r'(?s)\{(?P<action>eval):(?P<expr>.*?)\}(?!\\)'),
1182            re.compile(r'(?s)\{(?P<action>[^\\\W][-\w]*?):(?P<expr>.*?)\}(?!\\)'),
1183        ]
1184        skipped = False
1185        for reo in reos:
1186            pos = 0
1187            while True:
1188                mo = reo.search(line, pos)
1189                if not mo:
1190                    break
1191                expr = mo.group('expr')
1192                action = mo.group('action')
1193                expr = expr.replace('{\\', '{')
1194                expr = expr.replace('}\\', '}')
1195                s = system(action, expr, attrs=dictionary)
1196                if dictionary is not None and action in ('counter', 'counter2', 'set', 'set2'):
1197                    # These actions create and update attributes.
1198                    attrs.update(dictionary)
1199                if s is None:
1200                    # Drop line if the action returns None.
1201                    skipped = True
1202                    break
1203                line = line[:mo.start()] + s + line[mo.end():]
1204                pos = mo.start() + len(s)
1205            if skipped:
1206                break
1207        if not skipped:
1208            # Remove backslash from escaped entries.
1209            line = line.replace('{\\', '{')
1210            line = line.replace('}\\', '}')
1211            result.append(line)
1212    if string_result:
1213        if result:
1214            return '\n'.join(result)
1215        else:
1216            return None
1217    else:
1218        return tuple(result)
1219
1220
1221east_asian_widths = {
1222    'W': 2,   # Wide
1223    'F': 2,   # Full-width (wide)
1224    'Na': 1,  # Narrow
1225    'H': 1,   # Half-width (narrow)
1226    'N': 1,   # Neutral (not East Asian, treated as narrow)
1227    'A': 1,   # Ambiguous (s/b wide in East Asian context, narrow otherwise, but that doesn't work)
1228}
1229"""Mapping of result codes from `unicodedata.east_asian_width()` to character
1230column widths."""
1231
1232
1233def column_width(s):
1234    width = 0
1235    for c in s:
1236        width += east_asian_widths[unicodedata.east_asian_width(c)]
1237    return width
1238
1239
1240def date_time_str(t):
1241    """Convert seconds since the Epoch to formatted local date and time strings."""
1242    source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
1243    if source_date_epoch is not None:
1244        t = time.gmtime(min(t, int(source_date_epoch)))
1245    else:
1246        t = time.localtime(t)
1247    date_str = time.strftime('%Y-%m-%d', t)
1248    time_str = time.strftime('%H:%M:%S', t)
1249    if source_date_epoch is not None:
1250        time_str += ' UTC'
1251    elif time.daylight and t.tm_isdst == 1:
1252        time_str += ' ' + time.tzname[1]
1253    else:
1254        time_str += ' ' + time.tzname[0]
1255    # Attempt to convert the localtime to the output encoding.
1256    try:
1257        time_str = time_str.decode(locale.getdefaultlocale()[1])
1258    except Exception:
1259        pass
1260    return date_str, time_str
1261
1262
1263class Lex:
1264    """Lexical analysis routines. Static methods and attributes only."""
1265    prev_element = None
1266    prev_cursor = None
1267
1268    def __init__(self):
1269        raise AssertionError('no class instances allowed')
1270
1271    def __iter__(self):
1272        return self
1273
1274    @staticmethod
1275    def next_element():
1276        """Returns class of next element on the input (None if EOF).  The
1277        reader is assumed to be at the first line following a previous element,
1278        end of file or line one.  Exits with the reader pointing to the first
1279        line of the next element or EOF (leading blank lines are skipped)."""
1280        reader.skip_blank_lines()
1281        if reader.eof():
1282            return None
1283        # Optimization: If we've already checked for an element at this
1284        # position return the element.
1285        if Lex.prev_element and Lex.prev_cursor == reader.cursor:
1286            return Lex.prev_element
1287        if AttributeEntry.isnext():
1288            result = AttributeEntry
1289        elif AttributeList.isnext():
1290            result = AttributeList
1291        elif BlockTitle.isnext() and not tables_OLD.isnext():
1292            result = BlockTitle
1293        elif Title.isnext():
1294            if AttributeList.style() == 'float':
1295                result = FloatingTitle
1296            else:
1297                result = Title
1298        elif macros.isnext():
1299            result = macros.current
1300        elif lists.isnext():
1301            result = lists.current
1302        elif blocks.isnext():
1303            result = blocks.current
1304        elif tables_OLD.isnext():
1305            result = tables_OLD.current
1306        elif tables.isnext():
1307            result = tables.current
1308        else:
1309            if not paragraphs.isnext():
1310                raise EAsciiDoc('paragraph expected')
1311            result = paragraphs.current
1312        # Optimization: Cache answer.
1313        Lex.prev_cursor = reader.cursor
1314        Lex.prev_element = result
1315        return result
1316
1317    @staticmethod
1318    def canonical_subs(options):
1319        """Translate composite subs values."""
1320        if len(options) == 1:
1321            if options[0] == 'none':
1322                options = ()
1323            elif options[0] == 'normal':
1324                options = config.subsnormal
1325            elif options[0] == 'verbatim':
1326                options = config.subsverbatim
1327        return options
1328
1329    @staticmethod
1330    def subs_1(s, options):
1331        """Perform substitution specified in 'options' (in 'options' order)."""
1332        if not s:
1333            return s
1334        if document.attributes.get('plaintext') is not None:
1335            options = ('specialcharacters',)
1336        result = s
1337        options = Lex.canonical_subs(options)
1338        for o in options:
1339            if o == 'specialcharacters':
1340                result = config.subs_specialchars(result)
1341            elif o == 'attributes':
1342                result = subs_attrs(result)
1343            elif o == 'quotes':
1344                result = subs_quotes(result)
1345            elif o == 'specialwords':
1346                result = config.subs_specialwords(result)
1347            elif o in ('replacements', 'replacements2', 'replacements3'):
1348                result = config.subs_replacements(result, o)
1349            elif o == 'macros':
1350                result = macros.subs(result)
1351            elif o == 'callouts':
1352                result = macros.subs(result, callouts=True)
1353            else:
1354                raise EAsciiDoc('illegal substitution option: %s' % o)
1355            trace(o, s, result)
1356            if not result:
1357                break
1358        return result
1359
1360    @staticmethod
1361    def subs(lines, options):
1362        """Perform inline processing specified by 'options' (in 'options'
1363        order) on sequence of 'lines'."""
1364        if not lines or not options:
1365            return lines
1366        options = Lex.canonical_subs(options)
1367        # Join lines so quoting can span multiple lines.
1368        para = '\n'.join(lines)
1369        if 'macros' in options:
1370            para = macros.extract_passthroughs(para)
1371        for o in options:
1372            if o == 'attributes':
1373                # If we don't substitute attributes line-by-line then a single
1374                # undefined attribute will drop the entire paragraph.
1375                lines = subs_attrs(para.split('\n'))
1376                para = '\n'.join(lines)
1377            else:
1378                para = Lex.subs_1(para, (o,))
1379        if 'macros' in options:
1380            para = macros.restore_passthroughs(para)
1381        return para.splitlines()
1382
1383    @staticmethod
1384    def set_margin(lines, margin=0):
1385        """Utility routine that sets the left margin to 'margin' space in a
1386        block of non-blank lines."""
1387        # Calculate width of block margin.
1388        lines = list(lines)
1389        width = len(lines[0])
1390        for s in lines:
1391            i = re.search(r'\S', s).start()
1392            if i < width:
1393                width = i
1394        # Strip margin width from all lines.
1395        for i in range(len(lines)):
1396            lines[i] = ' ' * margin + lines[i][width:]
1397        return lines
1398
1399
1400# ---------------------------------------------------------------------------
1401# Document element classes parse AsciiDoc reader input and write DocBook writer
1402# output.
1403# ---------------------------------------------------------------------------
1404class Document(object):
1405    # doctype property.
1406    def getdoctype(self):
1407        return self.attributes.get('doctype')
1408
1409    def setdoctype(self, doctype):
1410        self.attributes['doctype'] = doctype
1411    doctype = property(getdoctype, setdoctype)
1412
1413    # backend property.
1414    def getbackend(self):
1415        return self.attributes.get('backend')
1416
1417    def setbackend(self, backend):
1418        if backend:
1419            backend = self.attributes.get('backend-alias-' + backend, backend)
1420        self.attributes['backend'] = backend
1421    backend = property(getbackend, setbackend)
1422
1423    def __init__(self):
1424        self.infile = None      # Source file name.
1425        self.outfile = None     # Output file name.
1426        self.attributes = InsensitiveDict()
1427        self.level = 0          # 0 => front matter. 1,2,3 => sect1,2,3.
1428        self.has_errors = False  # Set true if processing errors were flagged.
1429        self.has_warnings = False  # Set true if warnings were flagged.
1430        self.safe = False       # Default safe mode.
1431
1432    def update_attributes(self, attrs=None):
1433        """
1434        Set implicit attributes and attributes in 'attrs'.
1435        """
1436        t = time.time()
1437        self.attributes['localdate'], self.attributes['localtime'] = date_time_str(t)
1438        self.attributes['asciidoc-version'] = VERSION
1439        self.attributes['asciidoc-file'] = APP_FILE
1440        self.attributes['asciidoc-dir'] = APP_DIR
1441        if localapp():
1442            self.attributes['asciidoc-confdir'] = APP_DIR
1443        else:
1444            self.attributes['asciidoc-confdir'] = CONF_DIR
1445        self.attributes['user-dir'] = USER_DIR
1446        if config.verbose:
1447            self.attributes['verbose'] = ''
1448        # Update with configuration file attributes.
1449        if attrs:
1450            self.attributes.update(attrs)
1451        # Update with command-line attributes.
1452        self.attributes.update(config.cmd_attrs)
1453        # Extract miscellaneous configuration section entries from attributes.
1454        if attrs:
1455            config.load_miscellaneous(attrs)
1456        config.load_miscellaneous(config.cmd_attrs)
1457        self.attributes['newline'] = config.newline
1458        # File name related attributes can't be overridden.
1459        if self.infile is not None:
1460            if self.infile and os.path.exists(self.infile):
1461                t = os.path.getmtime(self.infile)
1462            elif self.infile == '<stdin>':
1463                t = time.time()
1464            else:
1465                t = None
1466            if t:
1467                self.attributes['docdate'], self.attributes['doctime'] = date_time_str(t)
1468            if self.infile != '<stdin>':
1469                self.attributes['infile'] = self.infile
1470                self.attributes['indir'] = os.path.dirname(self.infile)
1471                self.attributes['docfile'] = self.infile
1472                self.attributes['docdir'] = os.path.dirname(self.infile)
1473                self.attributes['docname'] = os.path.splitext(
1474                    os.path.basename(self.infile))[0]
1475        if self.outfile:
1476            if self.outfile != '<stdout>':
1477                self.attributes['outfile'] = self.outfile
1478                self.attributes['outdir'] = os.path.dirname(self.outfile)
1479                if self.infile == '<stdin>':
1480                    self.attributes['docname'] = os.path.splitext(
1481                        os.path.basename(self.outfile))[0]
1482                ext = os.path.splitext(self.outfile)[1][1:]
1483            elif config.outfilesuffix:
1484                ext = config.outfilesuffix[1:]
1485            else:
1486                ext = ''
1487            if ext:
1488                self.attributes['filetype'] = ext
1489                self.attributes['filetype-' + ext] = ''
1490
1491    def load_lang(self):
1492        """
1493        Load language configuration file.
1494        """
1495        lang = self.attributes.get('lang')
1496        if lang is None:
1497            filename = 'lang-en.conf'   # Default language file.
1498        else:
1499            filename = 'lang-' + lang + '.conf'
1500        if config.load_from_dirs(filename):
1501            self.attributes['lang'] = lang  # Reinstate new lang attribute.
1502        else:
1503            if lang is None:
1504                # The default language file must exist.
1505                message.error('missing conf file: %s' % filename, halt=True)
1506            else:
1507                message.warning('missing language conf file: %s' % filename)
1508
1509    def set_deprecated_attribute(self, old, new):
1510        """
1511        Ensures the 'old' name of an attribute that was renamed to 'new' is
1512        still honored.
1513        """
1514        if self.attributes.get(new) is None:
1515            if self.attributes.get(old) is not None:
1516                self.attributes[new] = self.attributes[old]
1517        else:
1518            self.attributes[old] = self.attributes[new]
1519
1520    @staticmethod
1521    def consume_attributes_and_comments(comments_only=False, noblanks=False):
1522        """
1523        Returns True if one or more attributes or comments were consumed.
1524        If 'noblanks' is True then consummation halts if a blank line is
1525        encountered.
1526        """
1527        result = False
1528        finished = False
1529        while not finished:
1530            finished = True
1531            if noblanks and not reader.read_next():
1532                return result
1533            if blocks.isnext() and 'skip' in blocks.current.options:
1534                result = True
1535                finished = False
1536                blocks.current.translate()
1537            if noblanks and not reader.read_next():
1538                return result
1539            if macros.isnext() and macros.current.name == 'comment':
1540                result = True
1541                finished = False
1542                macros.current.translate()
1543            if not comments_only:
1544                if AttributeEntry.isnext():
1545                    result = True
1546                    finished = False
1547                    AttributeEntry.translate()
1548                if AttributeList.isnext():
1549                    result = True
1550                    finished = False
1551                    AttributeList.translate()
1552        return result
1553
1554    def parse_header(self, doctype, backend):
1555        """
1556        Parses header, sets corresponding document attributes and finalizes
1557        document doctype and backend properties.
1558        Returns False if the document does not have a header.
1559        'doctype' and 'backend' are the doctype and backend option values
1560        passed on the command-line, None if no command-line option was not
1561        specified.
1562        """
1563        assert self.level == 0
1564        # Skip comments and attribute entries that precede the header.
1565        self.consume_attributes_and_comments()
1566        if doctype is not None:
1567            # Command-line overrides header.
1568            self.doctype = doctype
1569        elif self.doctype is None:
1570            # Was not set on command-line or in document header.
1571            self.doctype = DEFAULT_DOCTYPE
1572        # Process document header.
1573        has_header = (Title.isnext() and Title.level == 0 and AttributeList.style() != 'float')
1574        if self.doctype == 'manpage' and not has_header:
1575            message.error('manpage document title is mandatory', halt=True)
1576        if has_header:
1577            Header.parse()
1578        # Command-line entries override header derived entries.
1579        self.attributes.update(config.cmd_attrs)
1580        # DEPRECATED: revision renamed to revnumber.
1581        self.set_deprecated_attribute('revision', 'revnumber')
1582        # DEPRECATED: date renamed to revdate.
1583        self.set_deprecated_attribute('date', 'revdate')
1584        if doctype is not None:
1585            # Command-line overrides header.
1586            self.doctype = doctype
1587        if backend is not None:
1588            # Command-line overrides header.
1589            self.backend = backend
1590        elif self.backend is None:
1591            # Was not set on command-line or in document header.
1592            self.backend = DEFAULT_BACKEND
1593        else:
1594            # Has been set in document header.
1595            self.backend = self.backend  # Translate alias in header.
1596        assert self.doctype in ('article', 'manpage', 'book'), 'illegal document type'
1597        return has_header
1598
1599    def translate(self, has_header):
1600        if self.doctype == 'manpage':
1601            # Translate mandatory NAME section.
1602            if Lex.next_element() is not Title:
1603                message.error('name section expected')
1604            else:
1605                Title.translate()
1606                if Title.level != 1:
1607                    message.error('name section title must be at level 1')
1608                if not isinstance(Lex.next_element(), Paragraph):
1609                    message.error('malformed name section body')
1610                lines = reader.read_until(r'^$')
1611                s = ' '.join(lines)
1612                mo = re.match(r'^(?P<manname>.*?)\s+-\s+(?P<manpurpose>.*)$', s)
1613                if not mo:
1614                    message.error('malformed name section body')
1615                self.attributes['manname'] = mo.group('manname').strip()
1616                self.attributes['manpurpose'] = mo.group('manpurpose').strip()
1617                names = [s.strip() for s in self.attributes['manname'].split(',')]
1618                if len(names) > 9:
1619                    message.warning('too many manpage names')
1620                for i, name in enumerate(names):
1621                    self.attributes['manname%d' % (i + 1)] = name
1622        if has_header:
1623            # Do postponed substitutions (backend confs have been loaded).
1624            self.attributes['doctitle'] = Title.dosubs(self.attributes['doctitle'])
1625            if config.header_footer:
1626                hdr = config.subs_section('header', {})
1627                writer.write(hdr, trace='header')
1628            if 'title' in self.attributes:
1629                del self.attributes['title']
1630            self.consume_attributes_and_comments()
1631            if self.doctype in ('article', 'book'):
1632                # Translate 'preamble' (untitled elements between header
1633                # and first section title).
1634                if Lex.next_element() is not Title:
1635                    stag, etag = config.section2tags('preamble')
1636                    writer.write(stag, trace='preamble open')
1637                    Section.translate_body()
1638                    writer.write(etag, trace='preamble close')
1639            elif self.doctype == 'manpage' and 'name' in config.sections:
1640                writer.write(config.subs_section('name', {}), trace='name')
1641        else:
1642            self.process_author_names()
1643            if config.header_footer:
1644                hdr = config.subs_section('header', {})
1645                writer.write(hdr, trace='header')
1646            if Lex.next_element() is not Title:
1647                Section.translate_body()
1648        # Process remaining sections.
1649        while not reader.eof():
1650            if Lex.next_element() is not Title:
1651                raise EAsciiDoc('section title expected')
1652            Section.translate()
1653        Section.setlevel(0)  # Write remaining unwritten section close tags.
1654        # Substitute document parameters and write document footer.
1655        if config.header_footer:
1656            ftr = config.subs_section('footer', {})
1657            writer.write(ftr, trace='footer')
1658
1659    def parse_author(self, s):
1660        """ Return False if the author is malformed."""
1661        attrs = self.attributes  # Alias for readability.
1662        s = s.strip()
1663        mo = re.match(r'^(?P<name1>[^<>\s]+)'
1664                      '(\s+(?P<name2>[^<>\s]+))?'
1665                      '(\s+(?P<name3>[^<>\s]+))?'
1666                      '(\s+<(?P<email>\S+)>)?$', s)
1667        if not mo:
1668            # Names that don't match the formal specification.
1669            if s:
1670                attrs['firstname'] = s
1671            return
1672        firstname = mo.group('name1')
1673        if mo.group('name3'):
1674            middlename = mo.group('name2')
1675            lastname = mo.group('name3')
1676        else:
1677            middlename = None
1678            lastname = mo.group('name2')
1679        firstname = firstname.replace('_', ' ')
1680        if middlename:
1681            middlename = middlename.replace('_', ' ')
1682        if lastname:
1683            lastname = lastname.replace('_', ' ')
1684        email = mo.group('email')
1685        if firstname:
1686            attrs['firstname'] = firstname
1687        if middlename:
1688            attrs['middlename'] = middlename
1689        if lastname:
1690            attrs['lastname'] = lastname
1691        if email:
1692            attrs['email'] = email
1693        return
1694
1695    def process_author_names(self):
1696        """ Calculate any missing author related attributes."""
1697        attrs = self.attributes  # Alias for readability.
1698        firstname = attrs.get('firstname', '')
1699        middlename = attrs.get('middlename', '')
1700        lastname = attrs.get('lastname', '')
1701        author = attrs.get('author')
1702        initials = attrs.get('authorinitials')
1703        if author and not (firstname or middlename or lastname):
1704            self.parse_author(author)
1705            attrs['author'] = author.replace('_', ' ')
1706            self.process_author_names()
1707            return
1708        if not author:
1709            author = '%s %s %s' % (firstname, middlename, lastname)
1710            author = author.strip()
1711            author = re.sub(r'\s+', ' ', author)
1712        if not initials:
1713            initials = (firstname[:1] + middlename[:1] + lastname[:1])
1714            initials = initials.upper()
1715        names = [firstname, middlename, lastname, author, initials]
1716        for i, v in enumerate(names):
1717            v = config.subs_specialchars(v)
1718            v = subs_attrs(v)
1719            names[i] = v
1720        firstname, middlename, lastname, author, initials = names
1721        if firstname:
1722            attrs['firstname'] = firstname
1723        if middlename:
1724            attrs['middlename'] = middlename
1725        if lastname:
1726            attrs['lastname'] = lastname
1727        if author:
1728            attrs['author'] = author
1729        if initials:
1730            attrs['authorinitials'] = initials
1731        if author:
1732            attrs['authored'] = ''
1733
1734
1735class Header:
1736    """Static methods and attributes only."""
1737    REV_LINE_RE = r'^(\D*(?P<revnumber>.*?),)?(?P<revdate>.*?)(:\s*(?P<revremark>.*))?$'
1738    RCS_ID_RE = r'^\$Id: \S+ (?P<revnumber>\S+) (?P<revdate>\S+) \S+ (?P<author>\S+) (\S+ )?\$$'
1739
1740    def __init__(self):
1741        raise AssertionError('no class instances allowed')
1742
1743    @staticmethod
1744    def parse():
1745        assert Lex.next_element() is Title and Title.level == 0
1746        attrs = document.attributes  # Alias for readability.
1747        # Postpone title subs until backend conf files have been loaded.
1748        Title.translate(skipsubs=True)
1749        attrs['doctitle'] = Title.attributes['title']
1750        document.consume_attributes_and_comments(noblanks=True)
1751        s = reader.read_next()
1752        mo = None
1753        if s:
1754            # Process first header line after the title that is not a comment
1755            # or an attribute entry.
1756            s = reader.read()
1757            mo = re.match(Header.RCS_ID_RE, s)
1758            if not mo:
1759                document.parse_author(s)
1760                document.consume_attributes_and_comments(noblanks=True)
1761                if reader.read_next():
1762                    # Process second header line after the title that is not a
1763                    # comment or an attribute entry.
1764                    s = reader.read()
1765                    s = subs_attrs(s)
1766                    if s:
1767                        mo = re.match(Header.RCS_ID_RE, s)
1768                        if not mo:
1769                            mo = re.match(Header.REV_LINE_RE, s)
1770            document.consume_attributes_and_comments(noblanks=True)
1771        s = attrs.get('revnumber')
1772        if s:
1773            mo = re.match(Header.RCS_ID_RE, s)
1774        if mo:
1775            revnumber = mo.group('revnumber')
1776            if revnumber:
1777                attrs['revnumber'] = revnumber.strip()
1778            author = mo.groupdict().get('author')
1779            if author and 'firstname' not in attrs:
1780                document.parse_author(author)
1781            revremark = mo.groupdict().get('revremark')
1782            if revremark is not None:
1783                revremark = [revremark]
1784                # Revision remarks can continue on following lines.
1785                while reader.read_next():
1786                    if document.consume_attributes_and_comments(noblanks=True):
1787                        break
1788                    revremark.append(reader.read())
1789                revremark = Lex.subs(revremark, ['normal'])
1790                revremark = '\n'.join(revremark).strip()
1791                attrs['revremark'] = revremark
1792            revdate = mo.group('revdate')
1793            if revdate:
1794                attrs['revdate'] = revdate.strip()
1795            elif revnumber or revremark:
1796                # Set revision date to ensure valid DocBook revision.
1797                attrs['revdate'] = attrs['docdate']
1798        document.process_author_names()
1799        if document.doctype == 'manpage':
1800            # manpage title formatted like mantitle(manvolnum).
1801            mo = re.match(r'^(?P<mantitle>.*)\((?P<manvolnum>.*)\)$',
1802                          attrs['doctitle'])
1803            if not mo:
1804                message.error('malformed manpage title')
1805            else:
1806                mantitle = mo.group('mantitle').strip()
1807                mantitle = subs_attrs(mantitle)
1808                if mantitle is None:
1809                    message.error('undefined attribute in manpage title')
1810                # mantitle is lowered only if in ALL CAPS
1811                if mantitle == mantitle.upper():
1812                    mantitle = mantitle.lower()
1813                attrs['mantitle'] = mantitle
1814                attrs['manvolnum'] = mo.group('manvolnum').strip()
1815
1816
1817class AttributeEntry:
1818    """Static methods and attributes only."""
1819    pattern = None
1820    subs = None
1821    name = None
1822    name2 = None
1823    value = None
1824    attributes = {}     # Accumulates all the parsed attribute entries.
1825
1826    def __init__(self):
1827        raise AssertionError('no class instances allowed')
1828
1829    @staticmethod
1830    def isnext():
1831        result = False  # Assume not next.
1832        if not AttributeEntry.pattern:
1833            pat = document.attributes.get('attributeentry-pattern')
1834            if not pat:
1835                message.error("[attributes] missing 'attributeentry-pattern' entry")
1836            AttributeEntry.pattern = pat
1837        line = reader.read_next()
1838        if line:
1839            # Attribute entry formatted like :<name>[.<name2>]:[ <value>]
1840            mo = re.match(AttributeEntry.pattern, line)
1841            if mo:
1842                AttributeEntry.name = mo.group('attrname')
1843                AttributeEntry.name2 = mo.group('attrname2')
1844                AttributeEntry.value = mo.group('attrvalue') or ''
1845                AttributeEntry.value = AttributeEntry.value.strip()
1846                result = True
1847        return result
1848
1849    @staticmethod
1850    def translate():
1851        assert Lex.next_element() is AttributeEntry
1852        attr = AttributeEntry    # Alias for brevity.
1853        reader.read()            # Discard attribute entry from reader.
1854        while attr.value.endswith(' +'):
1855            if not reader.read_next():
1856                break
1857            attr.value = attr.value[:-1] + reader.read().strip()
1858        if attr.name2 is not None:
1859            # Configuration file attribute.
1860            if attr.name2 != '':
1861                # Section entry attribute.
1862                section = {}
1863                # Some sections can have name! syntax.
1864                if attr.name in ('attributes', 'miscellaneous') and attr.name2[-1] == '!':
1865                    section[attr.name] = [attr.name2]
1866                else:
1867                    section[attr.name] = ['%s=%s' % (attr.name2, attr.value)]
1868                config.load_sections(section)
1869                config.load_miscellaneous(config.conf_attrs)
1870            else:
1871                # Markup template section attribute.
1872                config.sections[attr.name] = [attr.value]
1873        else:
1874            # Normal attribute.
1875            if attr.name[-1] == '!':
1876                # Names like name! un-define the attribute.
1877                attr.name = attr.name[:-1]
1878                attr.value = None
1879            # Strip white space and illegal name chars.
1880            attr.name = re.sub(r'[^\w\-_]', '', attr.name).lower()
1881            # Don't override most command-line attributes.
1882            if attr.name in config.cmd_attrs \
1883                    and attr.name not in ('trace', 'numbered'):
1884                return
1885            # Update document attributes with attribute value.
1886            if attr.value is not None:
1887                mo = re.match(r'^pass:(?P<attrs>.*)\[(?P<value>.*)\]$', attr.value)
1888                if mo:
1889                    # Inline pass-through syntax.
1890                    attr.subs = mo.group('attrs')
1891                    attr.value = mo.group('value')  # Pass-through.
1892                else:
1893                    # Default substitution.
1894                    # DEPRECATED: attributeentry-subs
1895                    attr.subs = document.attributes.get('attributeentry-subs',
1896                                                        'specialcharacters,attributes')
1897                attr.subs = parse_options(attr.subs, SUBS_OPTIONS,
1898                                          'illegal substitution option')
1899                attr.value = Lex.subs((attr.value,), attr.subs)
1900                attr.value = writer.newline.join(attr.value)
1901                document.attributes[attr.name] = attr.value
1902            elif attr.name in document.attributes:
1903                del document.attributes[attr.name]
1904            attr.attributes[attr.name] = attr.value
1905
1906
1907class AttributeList:
1908    """Static methods and attributes only."""
1909    pattern = None
1910    match = None
1911    attrs = {}
1912
1913    def __init__(self):
1914        raise AssertionError('no class instances allowed')
1915
1916    @staticmethod
1917    def initialize():
1918        if 'attributelist-pattern' not in document.attributes:
1919            message.error("[attributes] missing 'attributelist-pattern' entry")
1920        AttributeList.pattern = document.attributes['attributelist-pattern']
1921
1922    @staticmethod
1923    def isnext():
1924        result = False  # Assume not next.
1925        line = reader.read_next()
1926        if line:
1927            mo = re.match(AttributeList.pattern, line)
1928            if mo:
1929                AttributeList.match = mo
1930                result = True
1931        return result
1932
1933    @staticmethod
1934    def translate():
1935        assert Lex.next_element() is AttributeList
1936        reader.read()   # Discard attribute list from reader.
1937        attrs = {}
1938        d = AttributeList.match.groupdict()
1939        for k, v in list(d.items()):
1940            if v is not None:
1941                if k == 'attrlist':
1942                    v = subs_attrs(v)
1943                    if v:
1944                        parse_attributes(v, attrs)
1945                else:
1946                    AttributeList.attrs[k] = v
1947        AttributeList.subs(attrs)
1948        AttributeList.attrs.update(attrs)
1949
1950    @staticmethod
1951    def subs(attrs):
1952        """Substitute single quoted attribute values normally."""
1953        reo = re.compile(r"^'.*'$")
1954        for k, v in list(attrs.items()):
1955            if reo.match(str(v)):
1956                attrs[k] = Lex.subs_1(v[1:-1], config.subsnormal)
1957
1958    @staticmethod
1959    def style():
1960        return AttributeList.attrs.get('style') or AttributeList.attrs.get('1')
1961
1962    @staticmethod
1963    def consume(d={}):
1964        """Add attribute list to the dictionary 'd' and reset the list."""
1965        if AttributeList.attrs:
1966            d.update(AttributeList.attrs)
1967            AttributeList.attrs = {}
1968            # Generate option attributes.
1969            if 'options' in d:
1970                options = parse_options(d['options'], (), 'illegal option name')
1971                for option in options:
1972                    d[option + '-option'] = ''
1973
1974
1975class BlockTitle:
1976    """Static methods and attributes only."""
1977    title = None
1978    pattern = None
1979
1980    def __init__(self):
1981        raise AssertionError('no class instances allowed')
1982
1983    @staticmethod
1984    def isnext():
1985        result = False  # Assume not next.
1986        line = reader.read_next()
1987        if line:
1988            mo = re.match(BlockTitle.pattern, line)
1989            if mo:
1990                BlockTitle.title = mo.group('title')
1991                result = True
1992        return result
1993
1994    @staticmethod
1995    def translate():
1996        assert Lex.next_element() is BlockTitle
1997        reader.read()   # Discard title from reader.
1998        # Perform title substitutions.
1999        if not Title.subs:
2000            Title.subs = config.subsnormal
2001        s = Lex.subs((BlockTitle.title,), Title.subs)
2002        s = writer.newline.join(s)
2003        if not s:
2004            message.warning('blank block title')
2005        BlockTitle.title = s
2006
2007    @staticmethod
2008    def consume(d={}):
2009        """If there is a title add it to dictionary 'd' then reset title."""
2010        if BlockTitle.title:
2011            d['title'] = BlockTitle.title
2012            BlockTitle.title = None
2013
2014
2015class Title:
2016    """Processes Header and Section titles. Static methods and attributes
2017    only."""
2018    # Class variables
2019    underlines = ('==', '--', '~~', '^^', '++')  # Levels 0,1,2,3,4.
2020    subs = ()
2021    pattern = None
2022    level = 0
2023    attributes = {}
2024    sectname = None
2025    section_numbers = [0] * len(underlines)
2026    dump_dict = {}
2027    linecount = None    # Number of lines in title (1 or 2).
2028
2029    def __init__(self):
2030        raise AssertionError('no class instances allowed')
2031
2032    @staticmethod
2033    def translate(skipsubs=False):
2034        """Parse the Title.attributes and Title.level from the reader. The
2035        real work has already been done by parse()."""
2036        assert Lex.next_element() in (Title, FloatingTitle)
2037        # Discard title from reader.
2038        for i in range(Title.linecount):
2039            reader.read()
2040        Title.setsectname()
2041        if not skipsubs:
2042            Title.attributes['title'] = Title.dosubs(Title.attributes['title'])
2043
2044    @staticmethod
2045    def dosubs(title):
2046        """
2047        Perform title substitutions.
2048        """
2049        if not Title.subs:
2050            Title.subs = config.subsnormal
2051        title = Lex.subs((title,), Title.subs)
2052        title = writer.newline.join(title)
2053        if not title:
2054            message.warning('blank section title')
2055        return title
2056
2057    @staticmethod
2058    def isnext():
2059        lines = reader.read_ahead(2)
2060        return Title.parse(lines)
2061
2062    @staticmethod
2063    def parse(lines):
2064        """Parse title at start of lines tuple."""
2065        if len(lines) == 0:
2066            return False
2067        if len(lines[0]) == 0:
2068            return False  # Title can't be blank.
2069        # Check for single-line titles.
2070        result = False
2071        for level in range(len(Title.underlines)):
2072            k = 'sect%s' % level
2073            if k in Title.dump_dict:
2074                mo = re.match(Title.dump_dict[k], lines[0])
2075                if mo:
2076                    Title.attributes = mo.groupdict()
2077                    Title.level = level
2078                    Title.linecount = 1
2079                    result = True
2080                    break
2081        if not result:
2082            # Check for double-line titles.
2083            if not Title.pattern:
2084                return False  # Single-line titles only.
2085            if len(lines) < 2:
2086                return False
2087            title, ul = lines[:2]
2088            title_len = column_width(title)
2089            ul_len = len(ul)
2090            if ul_len < 2:
2091                return False
2092            # Fast elimination check.
2093            if ul[:2] not in Title.underlines:
2094                return False
2095            # Length of underline must be within +/- 3 of title. Next, test for backward compatibility.
2096            if not ((ul_len-3 < title_len < ul_len+3) or (ul_len-3 < len(title) < ul_len+3)):
2097                return False
2098            # Check for valid repetition of underline character pairs.
2099            s = ul[:2] * ((ul_len + 1) // 2)
2100            if ul != s[:ul_len]:
2101                return False
2102            # Don't be fooled by back-to-back delimited blocks, require at
2103            # least one alphanumeric character in title.
2104
2105            if not re.search(r'\w', title):
2106                return False
2107
2108            mo = re.match(Title.pattern, title)
2109            if mo:
2110                Title.attributes = mo.groupdict()
2111                Title.level = list(Title.underlines).index(ul[:2])
2112                Title.linecount = 2
2113                result = True
2114        # Check for expected pattern match groups.
2115        if result:
2116            if 'title' not in Title.attributes:
2117                message.warning('[titles] entry has no <title> group')
2118                Title.attributes['title'] = lines[0]
2119            for k, v in list(Title.attributes.items()):
2120                if v is None:
2121                    del Title.attributes[k]
2122        try:
2123            Title.level += int(document.attributes.get('leveloffset', '0'))
2124        except:
2125            pass
2126        Title.attributes['level'] = str(Title.level)
2127        return result
2128
2129    @staticmethod
2130    def load(entries):
2131        """Load and validate [titles] section entries dictionary."""
2132        if 'underlines' in entries:
2133            errmsg = 'malformed [titles] underlines entry'
2134            try:
2135                underlines = parse_list(entries['underlines'])
2136            except Exception:
2137                raise EAsciiDoc(errmsg)
2138            if len(underlines) != len(Title.underlines):
2139                raise EAsciiDoc(errmsg)
2140            for s in underlines:
2141                if len(s) != 2:
2142                    raise EAsciiDoc(errmsg)
2143            Title.underlines = tuple(underlines)
2144            Title.dump_dict['underlines'] = entries['underlines']
2145        if 'subs' in entries:
2146            Title.subs = parse_options(entries['subs'], SUBS_OPTIONS,
2147                                       'illegal [titles] subs entry')
2148            Title.dump_dict['subs'] = entries['subs']
2149        if 'sectiontitle' in entries:
2150            pat = entries['sectiontitle']
2151            if not pat or not is_re(pat):
2152                raise EAsciiDoc('malformed [titles] sectiontitle entry')
2153            Title.pattern = pat
2154            Title.dump_dict['sectiontitle'] = pat
2155        if 'blocktitle' in entries:
2156            pat = entries['blocktitle']
2157            if not pat or not is_re(pat):
2158                raise EAsciiDoc('malformed [titles] blocktitle entry')
2159            BlockTitle.pattern = pat
2160            Title.dump_dict['blocktitle'] = pat
2161        # Load single-line title patterns.
2162        for k in ('sect0', 'sect1', 'sect2', 'sect3', 'sect4'):
2163            if k in entries:
2164                pat = entries[k]
2165                if not pat or not is_re(pat):
2166                    raise EAsciiDoc('malformed [titles] %s entry' % k)
2167                Title.dump_dict[k] = pat
2168        # TODO: Check we have either a Title.pattern or at least one
2169        # single-line title pattern -- can this be done here or do we need
2170        # check routine like the other block checkers?
2171
2172    @staticmethod
2173    def dump():
2174        dump_section('titles', Title.dump_dict)
2175
2176    @staticmethod
2177    def setsectname():
2178        """
2179        Set Title section name:
2180        If the first positional or 'template' attribute is set use it,
2181        next search for section title in [specialsections],
2182        if not found use default 'sect<level>' name.
2183        """
2184        sectname = AttributeList.attrs.get('1')
2185        if sectname and sectname != 'float':
2186            Title.sectname = sectname
2187        elif 'template' in AttributeList.attrs:
2188            Title.sectname = AttributeList.attrs['template']
2189        else:
2190            for pat, sect in list(config.specialsections.items()):
2191                mo = re.match(pat, Title.attributes['title'])
2192                if mo:
2193                    title = mo.groupdict().get('title')
2194                    if title is not None:
2195                        Title.attributes['title'] = title.strip()
2196                    else:
2197                        Title.attributes['title'] = mo.group().strip()
2198                    Title.sectname = sect
2199                    break
2200            else:
2201                Title.sectname = 'sect%d' % Title.level
2202
2203    @staticmethod
2204    def getnumber(level):
2205        """Return next section number at section 'level' formatted like
2206        1.2.3.4."""
2207        number = ''
2208        for l in range(len(Title.section_numbers)):
2209            n = Title.section_numbers[l]
2210            if l == 0:
2211                continue
2212            elif l < level:
2213                number = '%s%d.' % (number, n)
2214            elif l == level:
2215                number = '%s%d.' % (number, n + 1)
2216                Title.section_numbers[l] = n + 1
2217            elif l > level:
2218                # Reset unprocessed section levels.
2219                Title.section_numbers[l] = 0
2220        return number
2221
2222
2223class FloatingTitle(Title):
2224    """Floated titles are translated differently."""
2225
2226    @staticmethod
2227    def isnext():
2228        return Title.isnext() and AttributeList.style() == 'float'
2229
2230    @staticmethod
2231    def translate():
2232        assert Lex.next_element() is FloatingTitle
2233        Title.translate()
2234        Section.set_id()
2235        AttributeList.consume(Title.attributes)
2236        template = 'floatingtitle'
2237        if template in config.sections:
2238            stag, etag = config.section2tags(template, Title.attributes)
2239            writer.write(stag, trace='floating title')
2240        else:
2241            message.warning('missing template section: [%s]' % template)
2242
2243
2244class Section:
2245    """Static methods and attributes only."""
2246    endtags = []  # Stack of currently open section (level,endtag) tuples.
2247    ids = []      # List of already used ids.
2248
2249    def __init__(self):
2250        raise AssertionError('no class instances allowed')
2251
2252    @staticmethod
2253    def savetag(level, etag):
2254        """Save section end."""
2255        Section.endtags.append((level, etag))
2256
2257    @staticmethod
2258    def setlevel(level):
2259        """Set document level and write open section close tags up to level."""
2260        while Section.endtags and Section.endtags[-1][0] >= level:
2261            writer.write(Section.endtags.pop()[1], trace='section close')
2262        document.level = level
2263
2264    @staticmethod
2265    def gen_id(title):
2266        """
2267        The normalized value of the id attribute is an NCName according to
2268        the 'Namespaces in XML' Recommendation:
2269        NCName          ::=     NCNameStartChar NCNameChar*
2270        NCNameChar      ::=     NameChar - ':'
2271        NCNameStartChar ::=     Letter | '_'
2272        NameChar        ::=     Letter | Digit | '.' | '-' | '_' | ':'
2273        """
2274        # Replace non-alpha numeric characters in title with underscores and
2275        # convert to lower case.
2276        base_id = re.sub(r'\W+', '_', title).strip('_').lower()
2277        if 'ascii-ids' in document.attributes:
2278            # Replace non-ASCII characters with ASCII equivalents.
2279            try:
2280                from trans import trans
2281                base_id = trans(base_id)
2282            except ImportError:
2283                base_id = unicodedata.normalize('NFKD', base_id).encode('ascii', 'ignore').decode('ascii')
2284        # Prefix the ID name with idprefix attribute or underscore if not
2285        # defined. Prefix ensures the ID does not clash with existing IDs.
2286        idprefix = document.attributes.get('idprefix', '_')
2287        base_id = idprefix + base_id
2288        i = 1
2289        while True:
2290            if i == 1:
2291                id = base_id
2292            else:
2293                id = '%s_%d' % (base_id, i)
2294            if id not in Section.ids:
2295                Section.ids.append(id)
2296                return id
2297            else:
2298                id = base_id
2299            i += 1
2300
2301    @staticmethod
2302    def set_id():
2303        if not document.attributes.get('sectids') is None \
2304                and 'id' not in AttributeList.attrs:
2305            # Generate ids for sections.
2306            AttributeList.attrs['id'] = Section.gen_id(Title.attributes['title'])
2307
2308    @staticmethod
2309    def translate():
2310        assert Lex.next_element() is Title
2311        prev_sectname = Title.sectname
2312        Title.translate()
2313        if Title.level == 0 and document.doctype != 'book':
2314            message.error('only book doctypes can contain level 0 sections')
2315        if Title.level > document.level \
2316                and 'basebackend-docbook' in document.attributes \
2317                and prev_sectname in ('colophon', 'abstract',
2318                                      'dedication', 'glossary', 'bibliography'):
2319            message.error('%s section cannot contain sub-sections' % prev_sectname)
2320        if Title.level > document.level + 1:
2321            # Sub-sections of multi-part book level zero Preface and Appendices
2322            # are meant to be out of sequence.
2323            if document.doctype == 'book' \
2324                    and document.level == 0 \
2325                    and Title.level == 2 \
2326                    and prev_sectname in ('preface', 'appendix'):
2327                pass
2328            else:
2329                message.warning('section title out of sequence: '
2330                                'expected level %d, got level %d'
2331                                % (document.level + 1, Title.level))
2332        Section.set_id()
2333        Section.setlevel(Title.level)
2334        if 'numbered' in document.attributes:
2335            Title.attributes['sectnum'] = Title.getnumber(document.level)
2336        else:
2337            Title.attributes['sectnum'] = ''
2338        AttributeList.consume(Title.attributes)
2339        stag, etag = config.section2tags(Title.sectname, Title.attributes)
2340        Section.savetag(Title.level, etag)
2341        writer.write(stag, trace='section open: level %d: %s' %
2342                                 (Title.level, Title.attributes['title']))
2343        Section.translate_body()
2344
2345    @staticmethod
2346    def translate_body(terminator=Title):
2347        isempty = True
2348        next = Lex.next_element()
2349        cnt = 0
2350        while next and next is not terminator:
2351            if isinstance(terminator, DelimitedBlock) and next is Title:
2352                message.error('section title not permitted in delimited block')
2353            cnt += 1
2354            next.translate()
2355            next = Lex.next_element()
2356            isempty = False
2357        # The section is not empty if contains a subsection.
2358        if next and isempty and Title.level > document.level:
2359            isempty = False
2360        # Report empty sections if invalid markup will result.
2361        if isempty:
2362            if document.backend == 'docbook' and Title.sectname != 'index':
2363                message.error('empty section is not valid')
2364
2365
2366class AbstractBlock:
2367
2368    blocknames = []  # Global stack of names for push_blockname() and pop_blockname().
2369
2370    def __init__(self):
2371        # Configuration parameter names common to all blocks.
2372        self.CONF_ENTRIES = ('delimiter', 'options', 'subs', 'presubs', 'postsubs',
2373                             'posattrs', 'style', '.*-style', 'template', 'filter')
2374        self.start = None    # File reader cursor at start delimiter.
2375        self.defname = None  # Configuration file block definition section name.
2376        # Configuration parameters.
2377        self.delimiter = None  # Regular expression matching block delimiter.
2378        self.delimiter_reo = None  # Compiled delimiter.
2379        self.template = None  # template section entry.
2380        self.options = ()     # options entry list.
2381        self.presubs = None   # presubs/subs entry list.
2382        self.postsubs = ()    # postsubs entry list.
2383        self.filter = None    # filter entry.
2384        self.posattrs = ()    # posattrs entry list.
2385        self.style = None     # Default style.
2386        self.styles = OrderedDict()  # Each entry is a styles dictionary.
2387        # Before a block is processed it's attributes (from it's
2388        # attributes list) are merged with the block configuration parameters
2389        # (by self.merge_attributes()) resulting in the template substitution
2390        # dictionary (self.attributes) and the block's processing parameters
2391        # (self.parameters).
2392        self.attributes = {}
2393        # The names of block parameters.
2394        self.PARAM_NAMES = ('template', 'options', 'presubs', 'postsubs', 'filter')
2395        self.parameters = None
2396        # Leading delimiter match object.
2397        self.mo = None
2398
2399    def short_name(self):
2400        """ Return the text following the first dash in the section name."""
2401        i = self.defname.find('-')
2402        if i == -1:
2403            return self.defname
2404        else:
2405            return self.defname[i + 1:]
2406
2407    def error(self, msg, cursor=None, halt=False):
2408        message.error('[%s] %s' % (self.defname, msg), cursor, halt)
2409
2410    def is_conf_entry(self, param):
2411        """Return True if param matches an allowed configuration file entry
2412        name."""
2413        for s in self.CONF_ENTRIES:
2414            if re.match('^' + s + '$', param):
2415                return True
2416        return False
2417
2418    def load(self, defname, entries):
2419        """Update block definition from section 'entries' dictionary."""
2420        self.defname = defname
2421        self.update_parameters(entries, self, all=True)
2422
2423    def update_parameters(self, src, dst=None, all=False):
2424        """
2425        Parse processing parameters from src dictionary to dst object.
2426        dst defaults to self.parameters.
2427        If all is True then copy src entries that aren't parameter names.
2428        """
2429        dst = dst or self.parameters
2430        msg = '[%s] malformed entry %%s: %%s' % self.defname
2431
2432        def copy(obj, k, v):
2433            if isinstance(obj, dict):
2434                obj[k] = v
2435            else:
2436                setattr(obj, k, v)
2437        for k, v in list(src.items()):
2438            if not re.match(r'\d+', k) and not is_name(k):
2439                raise EAsciiDoc(msg % (k, v))
2440            if k == 'template':
2441                if not is_name(v):
2442                    raise EAsciiDoc(msg % (k, v))
2443                copy(dst, k, v)
2444            elif k == 'filter':
2445                copy(dst, k, v)
2446            elif k == 'options':
2447                if isinstance(v, str):
2448                    v = parse_options(v, (), msg % (k, v))
2449                    # Merge with existing options.
2450                    v = tuple(set(dst.options).union(set(v)))
2451                copy(dst, k, v)
2452            elif k in ('subs', 'presubs', 'postsubs'):
2453                # Subs is an alias for presubs.
2454                if k == 'subs':
2455                    k = 'presubs'
2456                if isinstance(v, str):
2457                    v = parse_options(v, SUBS_OPTIONS, msg % (k, v))
2458                copy(dst, k, v)
2459            elif k == 'delimiter':
2460                if v and is_re(v):
2461                    copy(dst, k, v)
2462                else:
2463                    raise EAsciiDoc(msg % (k, v))
2464            elif k == 'style':
2465                if is_name(v):
2466                    copy(dst, k, v)
2467                else:
2468                    raise EAsciiDoc(msg % (k, v))
2469            elif k == 'posattrs':
2470                v = parse_options(v, (), msg % (k, v))
2471                copy(dst, k, v)
2472            else:
2473                mo = re.match(r'^(?P<style>.*)-style$', k)
2474                if mo:
2475                    if not v:
2476                        raise EAsciiDoc(msg % (k, v))
2477                    style = mo.group('style')
2478                    if not is_name(style):
2479                        raise EAsciiDoc(msg % (k, v))
2480                    d = {}
2481                    if not parse_named_attributes(v, d):
2482                        raise EAsciiDoc(msg % (k, v))
2483                    if 'subs' in d:
2484                        # Subs is an alias for presubs.
2485                        d['presubs'] = d['subs']
2486                        del d['subs']
2487                    self.styles[style] = d
2488                elif all or k in self.PARAM_NAMES:
2489                    copy(dst, k, v)  # Derived class specific entries.
2490
2491    def get_param(self, name, params=None):
2492        """
2493        Return named processing parameter from params dictionary.
2494        If the parameter is not in params look in self.parameters.
2495        """
2496        if params and name in params:
2497            return params[name]
2498        elif name in self.parameters:
2499            return self.parameters[name]
2500        else:
2501            return None
2502
2503    def get_subs(self, params=None):
2504        """Return (presubs, postsubs) tuple."""
2505        presubs = self.get_param('presubs', params)
2506        postsubs = self.get_param('postsubs', params)
2507        return (presubs, postsubs)
2508
2509    def dump(self):
2510        """Write block definition to stdout."""
2511        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
2512        write('[' + self.defname + ']')
2513        if self.is_conf_entry('delimiter'):
2514            write('delimiter=' + self.delimiter)
2515        if self.template:
2516            write('template=' + self.template)
2517        if self.options:
2518            write('options=' + ','.join(self.options))
2519        if self.presubs:
2520            if self.postsubs:
2521                write('presubs=' + ','.join(self.presubs))
2522            else:
2523                write('subs=' + ','.join(self.presubs))
2524        if self.postsubs:
2525            write('postsubs=' + ','.join(self.postsubs))
2526        if self.filter:
2527            write('filter=' + self.filter)
2528        if self.posattrs:
2529            write('posattrs=' + ','.join(self.posattrs))
2530        if self.style:
2531            write('style=' + self.style)
2532        if self.styles:
2533            for style, d in list(self.styles.items()):
2534                s = ''
2535                for k, v in list(d.items()):
2536                    s += '%s=%r,' % (k, v)
2537                write('%s-style=%s' % (style, s[:-1]))
2538
2539    def validate(self):
2540        """Validate block after the complete configuration has been loaded."""
2541        if self.is_conf_entry('delimiter') and not self.delimiter:
2542            raise EAsciiDoc('[%s] missing delimiter' % self.defname)
2543        if self.style:
2544            if not is_name(self.style):
2545                raise EAsciiDoc('illegal style name: %s' % self.style)
2546            if self.style not in self.styles:
2547                if not isinstance(self, List):   # Lists don't have templates.
2548                    message.warning('[%s] \'%s\' style not in %s' % (
2549                        self.defname, self.style, list(self.styles.keys())))
2550        # Check all styles for missing templates.
2551        all_styles_have_template = True
2552        for k, v in list(self.styles.items()):
2553            t = v.get('template')
2554            if t and t not in config.sections:
2555                # Defer check if template name contains attributes.
2556                if not re.search(r'{.+}', t):
2557                    message.warning('missing template section: [%s]' % t)
2558            if not t:
2559                all_styles_have_template = False
2560        # Check we have a valid template entry or alternatively that all the
2561        # styles have templates.
2562        if self.is_conf_entry('template') and 'skip' not in self.options:
2563            if self.template:
2564                if self.template not in config.sections:
2565                    # Defer check if template name contains attributes.
2566                    if not re.search(r'{.+}', self.template):
2567                        message.warning('missing template section: [%s]'
2568                                        % self.template)
2569            elif not all_styles_have_template:
2570                if not isinstance(self, List):  # Lists don't have templates.
2571                    message.warning('missing styles templates: [%s]' % self.defname)
2572
2573    def isnext(self):
2574        """Check if this block is next in document reader."""
2575        result = False
2576        reader.skip_blank_lines()
2577        if reader.read_next():
2578            if not self.delimiter_reo:
2579                # Cache compiled delimiter optimization.
2580                self.delimiter_reo = re.compile(self.delimiter)
2581            mo = self.delimiter_reo.match(reader.read_next())
2582            if mo:
2583                self.mo = mo
2584                result = True
2585        return result
2586
2587    def translate(self):
2588        """Translate block from document reader."""
2589        if not self.presubs:
2590            self.presubs = config.subsnormal
2591        if reader.cursor:
2592            self.start = reader.cursor[:]
2593
2594    def push_blockname(self, blockname=None):
2595        """
2596        On block entry set the 'blockname' attribute.
2597        Only applies to delimited blocks, lists and tables.
2598        """
2599        if blockname is None:
2600            blockname = self.attributes.get('style', self.short_name()).lower()
2601        trace('push blockname', blockname)
2602        self.blocknames.append(blockname)
2603        document.attributes['blockname'] = blockname
2604
2605    def pop_blockname(self):
2606        """
2607        On block exits restore previous (parent) 'blockname' attribute or
2608        un-define it if we're no longer inside a block.
2609        """
2610        assert len(self.blocknames) > 0
2611        blockname = self.blocknames.pop()
2612        trace('pop blockname', blockname)
2613        if len(self.blocknames) == 0:
2614            document.attributes['blockname'] = None
2615        else:
2616            document.attributes['blockname'] = self.blocknames[-1]
2617
2618    def merge_attributes(self, attrs, params=[]):
2619        """
2620        Use the current block's attribute list (attrs dictionary) to build a
2621        dictionary of block processing parameters (self.parameters) and tag
2622        substitution attributes (self.attributes).
2623
2624        1. Copy the default parameters (self.*) to self.parameters.
2625        self.parameters are used internally to render the current block.
2626        Optional params array of additional parameters.
2627
2628        2. Copy attrs to self.attributes. self.attributes are used for template
2629        and tag substitution in the current block.
2630
2631        3. If a style attribute was specified update self.parameters with the
2632        corresponding style parameters; if there are any style parameters
2633        remaining add them to self.attributes (existing attribute list entries
2634        take precedence).
2635
2636        4. Set named positional attributes in self.attributes if self.posattrs
2637        was specified.
2638
2639        5. Finally self.parameters is updated with any corresponding parameters
2640        specified in attrs.
2641
2642        """
2643
2644        def check_array_parameter(param):
2645            # Check the parameter is a sequence type.
2646            if not is_array(self.parameters[param]):
2647                message.error('malformed %s parameter: %s' % (param, self.parameters[param]))
2648                # Revert to default value.
2649                self.parameters[param] = getattr(self, param)
2650
2651        params = list(self.PARAM_NAMES) + params
2652        self.attributes = {}
2653        if self.style:
2654            # If a default style is defined make it available in the template.
2655            self.attributes['style'] = self.style
2656        self.attributes.update(attrs)
2657        # Calculate dynamic block parameters.
2658        # Start with configuration file defaults.
2659        self.parameters = AttrDict()
2660        for name in params:
2661            self.parameters[name] = getattr(self, name)
2662        # Load the selected style attributes.
2663        posattrs = self.posattrs
2664        if posattrs and posattrs[0] == 'style':
2665            # Positional attribute style has highest precedence.
2666            style = self.attributes.get('1')
2667        else:
2668            style = None
2669        if not style:
2670            # Use explicit style attribute, fall back to default style.
2671            style = self.attributes.get('style', self.style)
2672        if style:
2673            if not is_name(style):
2674                message.error('illegal style name: %s' % style)
2675                style = self.style
2676            # Lists have implicit styles and do their own style checks.
2677            elif style not in self.styles and not isinstance(self, List):
2678                message.warning('missing style: [%s]: %s' % (self.defname, style))
2679                style = self.style
2680            if style in self.styles:
2681                self.attributes['style'] = style
2682                for k, v in list(self.styles[style].items()):
2683                    if k == 'posattrs':
2684                        posattrs = v
2685                    elif k in params:
2686                        self.parameters[k] = v
2687                    elif k not in self.attributes:
2688                        # Style attributes don't take precedence over explicit.
2689                        self.attributes[k] = v
2690        # Set named positional attributes.
2691        for i, v in enumerate(posattrs):
2692            if str(i + 1) in self.attributes:
2693                self.attributes[v] = self.attributes[str(i + 1)]
2694        # Override config and style attributes with attribute list attributes.
2695        self.update_parameters(attrs)
2696        check_array_parameter('options')
2697        check_array_parameter('presubs')
2698        check_array_parameter('postsubs')
2699
2700
2701class AbstractBlocks:
2702    """List of block definitions."""
2703    PREFIX = ''         # Conf file section name prefix set in derived classes.
2704    BLOCK_TYPE = None   # Block type set in derived classes.
2705
2706    def __init__(self):
2707        self.current = None
2708        self.blocks = []        # List of Block objects.
2709        self.default = None     # Default Block.
2710        self.delimiters = None  # Combined delimiters regular expression.
2711
2712    def load(self, sections):
2713        """Load block definition from 'sections' dictionary."""
2714        for k in list(sections.keys()):
2715            if re.match(r'^' + self.PREFIX + r'.+$', k):
2716                d = {}
2717                parse_entries(sections.get(k, ()), d)
2718                for b in self.blocks:
2719                    if b.defname == k:
2720                        break
2721                else:
2722                    b = self.BLOCK_TYPE()
2723                    self.blocks.append(b)
2724                try:
2725                    b.load(k, d)
2726                except EAsciiDoc as e:
2727                    raise EAsciiDoc('[%s] %s' % (k, str(e)))
2728
2729    def dump(self):
2730        for b in self.blocks:
2731            b.dump()
2732
2733    def isnext(self):
2734        for b in self.blocks:
2735            if b.isnext():
2736                self.current = b
2737                return True
2738        return False
2739
2740    def validate(self):
2741        """Validate the block definitions."""
2742        # Validate delimiters and build combined lists delimiter pattern.
2743        delimiters = []
2744        for b in self.blocks:
2745            assert b.__class__ is self.BLOCK_TYPE
2746            b.validate()
2747            if b.delimiter:
2748                delimiters.append(b.delimiter)
2749        self.delimiters = re_join(delimiters)
2750
2751
2752class Paragraph(AbstractBlock):
2753    def __init__(self):
2754        AbstractBlock.__init__(self)
2755        self.text = None          # Text in first line of paragraph.
2756
2757    def load(self, name, entries):
2758        AbstractBlock.load(self, name, entries)
2759
2760    def dump(self):
2761        AbstractBlock.dump(self)
2762        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
2763        write('')
2764
2765    def isnext(self):
2766        result = AbstractBlock.isnext(self)
2767        if result:
2768            self.text = self.mo.groupdict().get('text')
2769        return result
2770
2771    def translate(self):
2772        AbstractBlock.translate(self)
2773        attrs = self.mo.groupdict().copy()
2774        if 'text' in attrs:
2775            del attrs['text']
2776        BlockTitle.consume(attrs)
2777        AttributeList.consume(attrs)
2778        self.merge_attributes(attrs)
2779        reader.read()   # Discard (already parsed item first line).
2780        body = reader.read_until(paragraphs.terminators)
2781        if 'skip' in self.parameters.options:
2782            return
2783        body = [self.text] + list(body)
2784        presubs = self.parameters.presubs
2785        postsubs = self.parameters.postsubs
2786        if document.attributes.get('plaintext') is None:
2787            body = Lex.set_margin(body)  # Move body to left margin.
2788
2789        body = Lex.subs(body, presubs)
2790        template = self.parameters.template
2791        template = subs_attrs(template, attrs)
2792        stag = config.section2tags(template, self.attributes, skipend=True)[0]
2793        if self.parameters.filter:
2794            body = filter_lines(self.parameters.filter, body, self.attributes)
2795        body = Lex.subs(body, postsubs)
2796        etag = config.section2tags(template, self.attributes, skipstart=True)[1]
2797        # Write start tag, content, end tag.
2798        writer.write(dovetail_tags(stag, body, etag), trace='paragraph')
2799
2800
2801class Paragraphs(AbstractBlocks):
2802    """List of paragraph definitions."""
2803    BLOCK_TYPE = Paragraph
2804    PREFIX = 'paradef-'
2805
2806    def __init__(self):
2807        AbstractBlocks.__init__(self)
2808        self.terminators = None    # List of compiled re's.
2809
2810    def initialize(self):
2811        self.terminators = [
2812            re.compile(r'^\+$|^$'),
2813            re.compile(AttributeList.pattern),
2814            re.compile(blocks.delimiters),
2815            re.compile(tables.delimiters),
2816            re.compile(tables_OLD.delimiters),
2817        ]
2818
2819    def load(self, sections):
2820        AbstractBlocks.load(self, sections)
2821
2822    def validate(self):
2823        AbstractBlocks.validate(self)
2824        # Check we have a default paragraph definition, put it last in list.
2825        for b in self.blocks:
2826            if b.defname == 'paradef-default':
2827                self.blocks.append(b)
2828                self.default = b
2829                self.blocks.remove(b)
2830                break
2831        else:
2832            raise EAsciiDoc('missing section: [paradef-default]')
2833
2834
2835class List(AbstractBlock):
2836    NUMBER_STYLES = ('arabic', 'loweralpha', 'upperalpha', 'lowerroman', 'upperroman')
2837
2838    def __init__(self):
2839        AbstractBlock.__init__(self)
2840        self.CONF_ENTRIES += ('type', 'tags')
2841        self.PARAM_NAMES += ('tags',)
2842        # listdef conf file parameters.
2843        self.type = None
2844        self.tags = None      # Name of listtags-<tags> conf section.
2845        # Calculated parameters.
2846        self.tag = None       # Current tags AttrDict.
2847        self.label = None     # List item label (labeled lists).
2848        self.text = None      # Text in first line of list item.
2849        self.index = None     # Matched delimiter 'index' group (numbered lists).
2850        self.type = None      # List type ('numbered','bulleted','labeled').
2851        self.ordinal = None   # Current list item ordinal number (1..)
2852        self.number_style = None  # Current numbered list style ('arabic'..)
2853
2854    def load(self, name, entries):
2855        AbstractBlock.load(self, name, entries)
2856
2857    def dump(self):
2858        AbstractBlock.dump(self)
2859        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
2860        write('type=' + self.type)
2861        write('tags=' + self.tags)
2862        write('')
2863
2864    def validate(self):
2865        AbstractBlock.validate(self)
2866        tags = [self.tags]
2867        tags += [s['tags'] for s in list(self.styles.values()) if 'tags' in s]
2868        for t in tags:
2869            if t not in lists.tags:
2870                self.error('missing section: [listtags-%s]' % t, halt=True)
2871
2872    def isnext(self):
2873        result = AbstractBlock.isnext(self)
2874        if result:
2875            self.label = self.mo.groupdict().get('label')
2876            self.text = self.mo.groupdict().get('text')
2877            self.index = self.mo.groupdict().get('index')
2878        return result
2879
2880    def translate_entry(self):
2881        assert self.type == 'labeled'
2882        entrytag = subs_tag(self.tag.entry, self.attributes)
2883        labeltag = subs_tag(self.tag.label, self.attributes)
2884        writer.write(entrytag[0], trace='list entry open')
2885        writer.write(labeltag[0], trace='list label open')
2886        # Write labels.
2887        while Lex.next_element() is self:
2888            reader.read()   # Discard (already parsed item first line).
2889            writer.write_tag(self.tag.term, [self.label],
2890                             self.presubs, self.attributes, trace='list term')
2891            if self.text:
2892                break
2893        writer.write(labeltag[1], trace='list label close')
2894        # Write item text.
2895        self.translate_item()
2896        writer.write(entrytag[1], trace='list entry close')
2897
2898    def translate_item(self):
2899        if self.type == 'callout':
2900            self.attributes['coids'] = calloutmap.calloutids(self.ordinal)
2901        itemtag = subs_tag(self.tag.item, self.attributes)
2902        writer.write(itemtag[0], trace='list item open')
2903        # Write ItemText.
2904        text = reader.read_until(lists.terminators)
2905        if self.text:
2906            text = [self.text] + list(text)
2907        if text:
2908            writer.write_tag(self.tag.text, text, self.presubs, self.attributes, trace='list text')
2909        # Process explicit and implicit list item continuations.
2910        while True:
2911            continuation = reader.read_next() == '+'
2912            if continuation:
2913                reader.read()  # Discard continuation line.
2914            while Lex.next_element() in (BlockTitle, AttributeList):
2915                # Consume continued element title and attributes.
2916                Lex.next_element().translate()
2917            if not continuation and BlockTitle.title:
2918                # Titled elements terminate the list.
2919                break
2920            next = Lex.next_element()
2921            if next in lists.open:
2922                break
2923            elif isinstance(next, List):
2924                next.translate()
2925            elif isinstance(next, Paragraph) and 'listelement' in next.options:
2926                next.translate()
2927            elif continuation:
2928                # This is where continued elements are processed.
2929                if next is Title:
2930                    message.error('section title not allowed in list item', halt=True)
2931                next.translate()
2932            else:
2933                break
2934        writer.write(itemtag[1], trace='list item close')
2935
2936    @staticmethod
2937    def calc_style(index):
2938        """Return the numbered list style ('arabic'...) of the list item index.
2939        Return None if unrecognized style."""
2940        if re.match(r'^\d+[\.>]$', index):
2941            style = 'arabic'
2942        elif re.match(r'^[ivx]+\)$', index):
2943            style = 'lowerroman'
2944        elif re.match(r'^[IVX]+\)$', index):
2945            style = 'upperroman'
2946        elif re.match(r'^[a-z]\.$', index):
2947            style = 'loweralpha'
2948        elif re.match(r'^[A-Z]\.$', index):
2949            style = 'upperalpha'
2950        else:
2951            assert False
2952        return style
2953
2954    @staticmethod
2955    def calc_index(index, style):
2956        """Return the ordinal number of (1...) of the list item index
2957        for the given list style."""
2958        def roman_to_int(roman):
2959            roman = roman.lower()
2960            digits = {'i': 1, 'v': 5, 'x': 10}
2961            result = 0
2962            for i in range(len(roman)):
2963                digit = digits[roman[i]]
2964                # If next digit is larger this digit is negative.
2965                if i + 1 < len(roman) and digits[roman[i + 1]] > digit:
2966                    result -= digit
2967                else:
2968                    result += digit
2969            return result
2970        index = index[:-1]
2971        if style == 'arabic':
2972            ordinal = int(index)
2973        elif style == 'lowerroman':
2974            ordinal = roman_to_int(index)
2975        elif style == 'upperroman':
2976            ordinal = roman_to_int(index)
2977        elif style == 'loweralpha':
2978            ordinal = ord(index) - ord('a') + 1
2979        elif style == 'upperalpha':
2980            ordinal = ord(index) - ord('A') + 1
2981        else:
2982            assert False
2983        return ordinal
2984
2985    def check_index(self):
2986        """Check calculated self.ordinal (1,2,...) against the item number
2987        in the document (self.index) and check the number style is the same as
2988        the first item (self.number_style)."""
2989        assert self.type in ('numbered', 'callout')
2990        if self.index:
2991            style = self.calc_style(self.index)
2992            if style != self.number_style:
2993                message.warning('list item style: expected %s got %s' % (self.number_style, style), offset=1)
2994            ordinal = self.calc_index(self.index, style)
2995            if ordinal != self.ordinal:
2996                message.warning('list item index: expected %s got %s' % (self.ordinal, ordinal), offset=1)
2997
2998    def check_tags(self):
2999        """ Check that all necessary tags are present. """
3000        tags = set(Lists.TAGS)
3001        if self.type != 'labeled':
3002            tags = tags.difference(['entry', 'label', 'term'])
3003        missing = tags.difference(list(self.tag.keys()))
3004        if missing:
3005            self.error('missing tag(s): %s' % ','.join(missing), halt=True)
3006
3007    def translate(self):
3008        AbstractBlock.translate(self)
3009        if self.short_name() in ('bibliography', 'glossary', 'qanda'):
3010            message.deprecated('old %s list syntax' % self.short_name())
3011        lists.open.append(self)
3012        attrs = self.mo.groupdict().copy()
3013        for k in ('label', 'text', 'index'):
3014            if k in attrs:
3015                del attrs[k]
3016        if self.index:
3017            # Set the numbering style from first list item.
3018            attrs['style'] = self.calc_style(self.index)
3019        BlockTitle.consume(attrs)
3020        AttributeList.consume(attrs)
3021        self.merge_attributes(attrs, ['tags'])
3022        self.push_blockname()
3023        if self.type in ('numbered', 'callout'):
3024            self.number_style = self.attributes.get('style')
3025            if self.number_style not in self.NUMBER_STYLES:
3026                message.error('illegal numbered list style: %s' % self.number_style)
3027                # Fall back to default style.
3028                self.attributes['style'] = self.number_style = self.style
3029        self.tag = lists.tags[self.parameters.tags]
3030        self.check_tags()
3031        if 'width' in self.attributes:
3032            # Set horizontal list 'labelwidth' and 'itemwidth' attributes.
3033            v = str(self.attributes['width'])
3034            mo = re.match(r'^(\d{1,2})%?$', v)
3035            if mo:
3036                labelwidth = int(mo.group(1))
3037                self.attributes['labelwidth'] = str(labelwidth)
3038                self.attributes['itemwidth'] = str(100 - labelwidth)
3039            else:
3040                self.error('illegal attribute value: width="%s"' % v)
3041        stag, etag = subs_tag(self.tag.list, self.attributes)
3042        if stag:
3043            writer.write(stag, trace='list open')
3044        self.ordinal = 0
3045        # Process list till list syntax changes or there is a new title.
3046        while Lex.next_element() is self and not BlockTitle.title:
3047            self.ordinal += 1
3048            document.attributes['listindex'] = str(self.ordinal)
3049            if self.type in ('numbered', 'callout'):
3050                self.check_index()
3051            if self.type in ('bulleted', 'numbered', 'callout'):
3052                reader.read()   # Discard (already parsed item first line).
3053                self.translate_item()
3054            elif self.type == 'labeled':
3055                self.translate_entry()
3056            else:
3057                raise AssertionError('illegal [%s] list type' % self.defname)
3058        if etag:
3059            writer.write(etag, trace='list close')
3060        if self.type == 'callout':
3061            calloutmap.validate(self.ordinal)
3062            calloutmap.listclose()
3063        lists.open.pop()
3064        if len(lists.open):
3065            document.attributes['listindex'] = str(lists.open[-1].ordinal)
3066        self.pop_blockname()
3067
3068
3069class Lists(AbstractBlocks):
3070    """List of List objects."""
3071    BLOCK_TYPE = List
3072    PREFIX = 'listdef-'
3073    TYPES = ('bulleted', 'numbered', 'labeled', 'callout')
3074    TAGS = ('list', 'entry', 'item', 'text', 'label', 'term')
3075
3076    def __init__(self):
3077        AbstractBlocks.__init__(self)
3078        self.open = []  # A stack of the current and parent lists.
3079        self.tags = {}    # List tags dictionary. Each entry is a tags AttrDict.
3080        self.terminators = None    # List of compiled re's.
3081
3082    def initialize(self):
3083        self.terminators = [
3084            re.compile(r'^\+$|^$'),
3085            re.compile(AttributeList.pattern),
3086            re.compile(lists.delimiters),
3087            re.compile(blocks.delimiters),
3088            re.compile(tables.delimiters),
3089            re.compile(tables_OLD.delimiters),
3090        ]
3091
3092    def load(self, sections):
3093        AbstractBlocks.load(self, sections)
3094        self.load_tags(sections)
3095
3096    def load_tags(self, sections):
3097        """
3098        Load listtags-* conf file sections to self.tags.
3099        """
3100        for section in list(sections.keys()):
3101            mo = re.match(r'^listtags-(?P<name>\w+)$', section)
3102            if mo:
3103                name = mo.group('name')
3104                if name in self.tags:
3105                    d = self.tags[name]
3106                else:
3107                    d = AttrDict()
3108                parse_entries(sections.get(section, ()), d)
3109                for k in list(d.keys()):
3110                    if k not in self.TAGS:
3111                        message.warning('[%s] contains illegal list tag: %s' % (section, k))
3112                self.tags[name] = d
3113
3114    def validate(self):
3115        AbstractBlocks.validate(self)
3116        for b in self.blocks:
3117            # Check list has valid type.
3118            if b.type not in Lists.TYPES:
3119                raise EAsciiDoc('[%s] illegal type' % b.defname)
3120            b.validate()
3121
3122    def dump(self):
3123        AbstractBlocks.dump(self)
3124        for k, v in list(self.tags.items()):
3125            dump_section('listtags-' + k, v)
3126
3127
3128class DelimitedBlock(AbstractBlock):
3129    def __init__(self):
3130        AbstractBlock.__init__(self)
3131
3132    def load(self, name, entries):
3133        AbstractBlock.load(self, name, entries)
3134
3135    def dump(self):
3136        AbstractBlock.dump(self)
3137        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
3138        write('')
3139
3140    def isnext(self):
3141        return AbstractBlock.isnext(self)
3142
3143    def translate(self):
3144        AbstractBlock.translate(self)
3145        reader.read()   # Discard delimiter.
3146        self.merge_attributes(AttributeList.attrs)
3147        if 'skip' not in self.parameters.options:
3148            BlockTitle.consume(self.attributes)
3149            AttributeList.consume()
3150        if 'options' in self.attributes:
3151            options = parse_options(self.attributes['options'], (), 'illegal option name')
3152            for option in options:
3153                self.attributes[option + '-option'] = ''
3154        self.push_blockname()
3155        options = self.parameters.options
3156        if 'skip' in options:
3157            reader.read_until(self.delimiter, same_file=True)
3158        elif safe() and self.defname == 'blockdef-backend':
3159            message.unsafe('Backend Block')
3160            reader.read_until(self.delimiter, same_file=True)
3161        else:
3162            template = self.parameters.template
3163            template = subs_attrs(template, self.attributes)
3164            name = self.short_name() + ' block'
3165            if 'sectionbody' in options:
3166                # The body is treated like a section body.
3167                stag, etag = config.section2tags(template, self.attributes)
3168                writer.write(stag, trace=name + ' open')
3169                Section.translate_body(self)
3170                writer.write(etag, trace=name + ' close')
3171            else:
3172                stag = config.section2tags(template, self.attributes, skipend=True)[0]
3173                body = reader.read_until(self.delimiter, same_file=True)
3174                presubs = self.parameters.presubs
3175                postsubs = self.parameters.postsubs
3176                body = Lex.subs(body, presubs)
3177                if self.parameters.filter:
3178                    body = filter_lines(self.parameters.filter, body, self.attributes)
3179                body = Lex.subs(body, postsubs)
3180                # Write start tag, content, end tag.
3181                etag = config.section2tags(template, self.attributes, skipstart=True)[1]
3182                writer.write(dovetail_tags(stag, body, etag), trace=name)
3183            trace(self.short_name() + ' block close', etag)
3184        if reader.eof():
3185            self.error('missing closing delimiter', self.start)
3186        else:
3187            delimiter = reader.read()   # Discard delimiter line.
3188            assert re.match(self.delimiter, delimiter)
3189        self.pop_blockname()
3190
3191
3192class DelimitedBlocks(AbstractBlocks):
3193    """List of delimited blocks."""
3194    BLOCK_TYPE = DelimitedBlock
3195    PREFIX = 'blockdef-'
3196
3197    def __init__(self):
3198        AbstractBlocks.__init__(self)
3199
3200    def load(self, sections):
3201        """Update blocks defined in 'sections' dictionary."""
3202        AbstractBlocks.load(self, sections)
3203
3204    def validate(self):
3205        AbstractBlocks.validate(self)
3206
3207
3208class Column:
3209    """Table column."""
3210    def __init__(self, width=None, align_spec=None, style=None):
3211        self.width = width or '1'
3212        self.halign, self.valign = Table.parse_align_spec(align_spec)
3213        self.style = style      # Style name or None.
3214        # Calculated attribute values.
3215        self.abswidth = None    # 1..   (page units).
3216        self.pcwidth = None     # 1..99 (percentage).
3217
3218
3219class Cell:
3220    def __init__(self, data, span_spec=None, align_spec=None, style=None):
3221        self.data = data
3222        self.span, self.vspan = Table.parse_span_spec(span_spec)
3223        self.halign, self.valign = Table.parse_align_spec(align_spec)
3224        self.style = style
3225        self.reserved = False
3226
3227    def __repr__(self):
3228        return '<Cell: %d.%d %s.%s %s "%s">' % (
3229            self.span, self.vspan,
3230            self.halign, self.valign,
3231            self.style or '',
3232            self.data)
3233
3234    def clone_reserve(self):
3235        """Return a clone of self to reserve vertically spanned cell."""
3236        result = copy.copy(self)
3237        result.vspan = 1
3238        result.reserved = True
3239        return result
3240
3241
3242class Table(AbstractBlock):
3243    ALIGN = {'<': 'left', '>': 'right', '^': 'center'}
3244    VALIGN = {'<': 'top', '>': 'bottom', '^': 'middle'}
3245    FORMATS = ('psv', 'csv', 'dsv')
3246    SEPARATORS = dict(
3247        csv=',',
3248        dsv=r':|\n',
3249        # The count and align group matches are not exact.
3250        psv=r'((?<!\S)((?P<span>[\d.]+)(?P<op>[*+]))?(?P<align>[<\^>.]{,3})?(?P<style>[a-z])?)?\|'
3251    )
3252
3253    def __init__(self):
3254        AbstractBlock.__init__(self)
3255        self.CONF_ENTRIES += ('format', 'tags', 'separator')
3256        # tabledef conf file parameters.
3257        self.format = 'psv'
3258        self.separator = None
3259        self.tags = None          # Name of tabletags-<tags> conf section.
3260        # Calculated parameters.
3261        self.abswidth = None      # 1..   (page units).
3262        self.pcwidth = None     # 1..99 (percentage).
3263        self.rows = []            # Parsed rows, each row is a list of Cells.
3264        self.columns = []         # List of Columns.
3265
3266    @staticmethod
3267    def parse_align_spec(align_spec):
3268        """
3269        Parse AsciiDoc cell alignment specifier and return 2-tuple with
3270        horizontal and vertical alignment names. Unspecified alignments
3271        set to None.
3272        """
3273        result = (None, None)
3274        if align_spec:
3275            mo = re.match(r'^([<\^>])?(\.([<\^>]))?$', align_spec)
3276            if mo:
3277                result = (Table.ALIGN.get(mo.group(1)),
3278                          Table.VALIGN.get(mo.group(3)))
3279        return result
3280
3281    @staticmethod
3282    def parse_span_spec(span_spec):
3283        """
3284        Parse AsciiDoc cell span specifier and return 2-tuple with horizontal
3285        and vertical span counts. Set default values (1,1) if not
3286        specified.
3287        """
3288        result = (None, None)
3289        if span_spec:
3290            mo = re.match(r'^(\d+)?(\.(\d+))?$', span_spec)
3291            if mo:
3292                result = (mo.group(1) and int(mo.group(1)),
3293                          mo.group(3) and int(mo.group(3)))
3294        return (result[0] or 1, result[1] or 1)
3295
3296    def load(self, name, entries):
3297        AbstractBlock.load(self, name, entries)
3298
3299    def dump(self):
3300        AbstractBlock.dump(self)
3301        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
3302        write('format=' + self.format)
3303        write('')
3304
3305    def validate(self):
3306        AbstractBlock.validate(self)
3307        if self.format not in Table.FORMATS:
3308            self.error('illegal format=%s' % self.format, halt=True)
3309        self.tags = self.tags or 'default'
3310        tags = [self.tags]
3311        tags += [s['tags'] for s in list(self.styles.values()) if 'tags' in s]
3312        for t in tags:
3313            if t not in tables.tags:
3314                self.error('missing section: [tabletags-%s]' % t, halt=True)
3315        if self.separator:
3316            # Evaluate escape characters.
3317            self.separator = literal_eval('"' + self.separator + '"')
3318        # TODO: Move to class Tables
3319        # Check global table parameters.
3320        elif config.pagewidth is None:
3321            self.error('missing [miscellaneous] entry: pagewidth')
3322        elif config.pageunits is None:
3323            self.error('missing [miscellaneous] entry: pageunits')
3324
3325    def validate_attributes(self):
3326        """Validate and parse table attributes."""
3327        # Set defaults.
3328        format = self.format
3329        tags = self.tags
3330        separator = self.separator
3331        abswidth = float(config.pagewidth)
3332        pcwidth = 100.0
3333        for k, v in list(self.attributes.items()):
3334            if k == 'format':
3335                if v not in self.FORMATS:
3336                    self.error('illegal %s=%s' % (k, v))
3337                else:
3338                    format = v
3339            elif k == 'tags':
3340                if v not in tables.tags:
3341                    self.error('illegal %s=%s' % (k, v))
3342                else:
3343                    tags = v
3344            elif k == 'separator':
3345                separator = v
3346            elif k == 'width':
3347                if not re.match(r'^\d{1,3}%$', v) or int(v[:-1]) > 100:
3348                    self.error('illegal %s=%s' % (k, v))
3349                else:
3350                    abswidth = float(v[:-1]) / 100 * config.pagewidth
3351                    pcwidth = float(v[:-1])
3352        # Calculate separator if it has not been specified.
3353        if not separator:
3354            separator = Table.SEPARATORS[format]
3355        if format == 'csv':
3356            if len(separator) > 1:
3357                self.error('illegal csv separator=%s' % separator)
3358                separator = ','
3359        else:
3360            if not is_re(separator):
3361                self.error('illegal regular expression: separator=%s' % separator)
3362        self.parameters.format = format
3363        self.parameters.tags = tags
3364        self.parameters.separator = separator
3365        self.abswidth = abswidth
3366        self.pcwidth = pcwidth
3367
3368    def get_tags(self, params):
3369        tags = self.get_param('tags', params)
3370        assert(tags and tags in tables.tags)
3371        return tables.tags[tags]
3372
3373    def get_style(self, prefix):
3374        """
3375        Return the style dictionary whose name starts with 'prefix'.
3376        """
3377        if prefix is None:
3378            return None
3379        names = list(self.styles.keys())
3380        names.sort()
3381        for name in names:
3382            if name.startswith(prefix):
3383                return self.styles[name]
3384        else:
3385            self.error('missing style: %s*' % prefix)
3386            return None
3387
3388    def parse_cols(self, cols, halign, valign):
3389        """
3390        Build list of column objects from table 'cols', 'halign' and 'valign'
3391        attributes.
3392        """
3393        # [<multiplier>*][<align>][<width>][<style>]
3394        COLS_RE1 = r'^((?P<count>\d+)\*)?(?P<align>[<\^>.]{,3})?(?P<width>\d+%?)?(?P<style>[a-z]\w*)?$'
3395        # [<multiplier>*][<width>][<align>][<style>]
3396        COLS_RE2 = r'^((?P<count>\d+)\*)?(?P<width>\d+%?)?(?P<align>[<\^>.]{,3})?(?P<style>[a-z]\w*)?$'
3397        reo1 = re.compile(COLS_RE1)
3398        reo2 = re.compile(COLS_RE2)
3399        cols = str(cols)
3400        if re.match(r'^\d+$', cols):
3401            for i in range(int(cols)):
3402                self.columns.append(Column())
3403        else:
3404            for col in re.split(r'\s*,\s*', cols):
3405                mo = reo1.match(col)
3406                if not mo:
3407                    mo = reo2.match(col)
3408                if mo:
3409                    count = int(mo.groupdict().get('count') or 1)
3410                    for i in range(count):
3411                        self.columns.append(
3412                            Column(mo.group('width'), mo.group('align'),
3413                                   self.get_style(mo.group('style')))
3414                        )
3415                else:
3416                    self.error('illegal column spec: %s' % col, self.start)
3417        # Set column (and indirectly cell) default alignments.
3418        for col in self.columns:
3419            col.halign = col.halign or halign or document.attributes.get('halign') or 'left'
3420            col.valign = col.valign or valign or document.attributes.get('valign') or 'top'
3421        # Validate widths and calculate missing widths.
3422        n = 0
3423        percents = 0
3424        props = 0
3425        for col in self.columns:
3426            if col.width:
3427                if col.width[-1] == '%':
3428                    percents += int(col.width[:-1])
3429                else:
3430                    props += int(col.width)
3431                n += 1
3432        if percents > 0 and props > 0:
3433            self.error('mixed percent and proportional widths: %s' % cols, self.start)
3434        pcunits = percents > 0
3435        # Fill in missing widths.
3436        if n < len(self.columns) and percents < 100:
3437            if pcunits:
3438                width = float(100 - percents) / float(len(self.columns) - n)
3439            else:
3440                width = 1
3441            for col in self.columns:
3442                if not col.width:
3443                    if pcunits:
3444                        col.width = str(int(width)) + '%'
3445                        percents += width
3446                    else:
3447                        col.width = str(width)
3448                        props += width
3449        # Calculate column alignment and absolute and percent width values.
3450        percents = 0
3451        for col in self.columns:
3452            if pcunits:
3453                col.pcwidth = float(col.width[:-1])
3454            else:
3455                col.pcwidth = (float(col.width) / props) * 100
3456            col.abswidth = self.abswidth * (col.pcwidth / 100)
3457            if config.pageunits in ('cm', 'mm', 'in', 'em'):
3458                col.abswidth = '%.2f' % py2round(col.abswidth, 2)
3459            else:
3460                col.abswidth = '%d' % py2round(col.abswidth)
3461            percents += col.pcwidth
3462            col.pcwidth = int(col.pcwidth)
3463        if py2round(percents) > 100:
3464            self.error('total width exceeds 100%%: %s' % cols, self.start)
3465        elif py2round(percents) < 100:
3466            self.error('total width less than 100%%: %s' % cols, self.start)
3467
3468    def build_colspecs(self):
3469        """
3470        Generate column related substitution attributes.
3471        """
3472        cols = []
3473        i = 1
3474        for col in self.columns:
3475            colspec = self.get_tags(col.style).colspec
3476            if colspec:
3477                self.attributes['halign'] = col.halign
3478                self.attributes['valign'] = col.valign
3479                self.attributes['colabswidth'] = col.abswidth
3480                self.attributes['colpcwidth'] = col.pcwidth
3481                self.attributes['colnumber'] = str(i)
3482                s = subs_attrs(colspec, self.attributes)
3483                if not s:
3484                    message.warning('colspec dropped: contains undefined attribute')
3485                else:
3486                    cols.append(s)
3487            i += 1
3488        if cols:
3489            self.attributes['colspecs'] = writer.newline.join(cols)
3490
3491    def parse_rows(self, text):
3492        """
3493        Parse the table source text into self.rows (a list of rows, each row
3494        is a list of Cells.
3495        """
3496        reserved = {}  # Reserved cells generated by rowspans.
3497        if self.parameters.format in ('psv', 'dsv'):
3498            colcount = len(self.columns)
3499            parsed_cells = self.parse_psv_dsv(text)
3500            ri = 0  # Current row index 0..
3501            ci = 0  # Column counter 0..colcount
3502            row = []
3503            i = 0
3504            while True:
3505                resv = reserved.get(ri) and reserved[ri].get(ci)
3506                if resv:
3507                    # We have a cell generated by a previous row span so
3508                    # process it before continuing with the current parsed
3509                    # cell.
3510                    cell = resv
3511                else:
3512                    if i >= len(parsed_cells):
3513                        break   # No more parsed or reserved cells.
3514                    cell = parsed_cells[i]
3515                    i += 1
3516                    if cell.vspan > 1:
3517                        # Generate ensuing reserved cells spanned vertically by
3518                        # the current cell.
3519                        for j in range(1, cell.vspan):
3520                            if ri + j not in reserved:
3521                                reserved[ri + j] = {}
3522                            reserved[ri + j][ci] = cell.clone_reserve()
3523                ci += cell.span
3524                if ci <= colcount:
3525                    row.append(cell)
3526                if ci >= colcount:
3527                    self.rows.append(row)
3528                    ri += 1
3529                    row = []
3530                    ci = 0
3531        elif self.parameters.format == 'csv':
3532            self.rows = self.parse_csv(text)
3533        else:
3534            assert True, 'illegal table format'
3535        # Check for empty rows containing only reserved (spanned) cells.
3536        for ri, row in enumerate(self.rows):
3537            empty = True
3538            for cell in row:
3539                if not cell.reserved:
3540                    empty = False
3541                    break
3542            if empty:
3543                message.warning('table row %d: empty spanned row' % (ri + 1))
3544        # Check that all row spans match.
3545        for ri, row in enumerate(self.rows):
3546            row_span = 0
3547            for cell in row:
3548                row_span += cell.span
3549            if ri == 0:
3550                header_span = row_span
3551            if row_span < header_span:
3552                message.warning('table row %d: does not span all columns' % (ri + 1))
3553            if row_span > header_span:
3554                message.warning('table row %d: exceeds columns span' % (ri + 1))
3555
3556    def subs_rows(self, rows, rowtype='body'):
3557        """
3558        Return a string of output markup from a list of rows, each row
3559        is a list of raw data text.
3560        """
3561        tags = tables.tags[self.parameters.tags]
3562        if rowtype == 'header':
3563            rtag = tags.headrow
3564        elif rowtype == 'footer':
3565            rtag = tags.footrow
3566        else:
3567            rtag = tags.bodyrow
3568        result = []
3569        stag, etag = subs_tag(rtag, self.attributes)
3570        for row in rows:
3571            result.append(stag)
3572            result += self.subs_row(row, rowtype)
3573            result.append(etag)
3574        return writer.newline.join(result)
3575
3576    def subs_row(self, row, rowtype):
3577        """
3578        Substitute the list of Cells using the data tag.
3579        Returns a list of marked up table cell elements.
3580        """
3581        result = []
3582        i = 0
3583        for cell in row:
3584            if cell.reserved:
3585                # Skip vertically spanned placeholders.
3586                i += cell.span
3587                continue
3588            if i >= len(self.columns):
3589                break   # Skip cells outside the header width.
3590            col = self.columns[i]
3591            self.attributes['halign'] = cell.halign or col.halign
3592            self.attributes['valign'] = cell.valign or col.valign
3593            self.attributes['colabswidth'] = col.abswidth
3594            self.attributes['colpcwidth'] = col.pcwidth
3595            self.attributes['colnumber'] = str(i + 1)
3596            self.attributes['colspan'] = str(cell.span)
3597            self.attributes['colstart'] = self.attributes['colnumber']
3598            self.attributes['colend'] = str(i + cell.span)
3599            self.attributes['rowspan'] = str(cell.vspan)
3600            self.attributes['morerows'] = str(cell.vspan - 1)
3601            # Fill missing column data with blanks.
3602            if i > len(self.columns) - 1:
3603                data = ''
3604            else:
3605                data = cell.data
3606            if rowtype == 'header':
3607                # Use table style unless overridden by cell style.
3608                colstyle = cell.style
3609            else:
3610                # If the cell style is not defined use the column style.
3611                colstyle = cell.style or col.style
3612            tags = self.get_tags(colstyle)
3613            presubs, postsubs = self.get_subs(colstyle)
3614            data = [data]
3615            data = Lex.subs(data, presubs)
3616            data = filter_lines(self.get_param('filter', colstyle),
3617                                data, self.attributes)
3618            data = Lex.subs(data, postsubs)
3619            if rowtype != 'header':
3620                ptag = tags.paragraph
3621                if ptag:
3622                    stag, etag = subs_tag(ptag, self.attributes)
3623                    text = '\n'.join(data).strip()
3624                    data = []
3625                    for para in re.split(r'\n{2,}', text):
3626                        data += dovetail_tags([stag], para.split('\n'), [etag])
3627            if rowtype == 'header':
3628                dtag = tags.headdata
3629            elif rowtype == 'footer':
3630                dtag = tags.footdata
3631            else:
3632                dtag = tags.bodydata
3633            stag, etag = subs_tag(dtag, self.attributes)
3634            result = result + dovetail_tags([stag], data, [etag])
3635            i += cell.span
3636        return result
3637
3638    def parse_csv(self, text):
3639        """
3640        Parse the table source text and return a list of rows, each row
3641        is a list of Cells.
3642        """
3643        rows = []
3644        rdr = csv.reader(io.StringIO(DEFAULT_NEWLINE.join(text)),
3645                         delimiter=self.parameters.separator, skipinitialspace=True)
3646        try:
3647            for row in rdr:
3648                rows.append([Cell(data) for data in row])
3649        except Exception:
3650            self.error('csv parse error: %s' % row)
3651        return rows
3652
3653    def parse_psv_dsv(self, text):
3654        """
3655        Parse list of PSV or DSV table source text lines and return a list of
3656        Cells.
3657        """
3658        def append_cell(data, span_spec, op, align_spec, style):
3659            op = op or '+'
3660            if op == '*':   # Cell multiplier.
3661                span = Table.parse_span_spec(span_spec)[0]
3662                for i in range(span):
3663                    cells.append(Cell(data, '1', align_spec, style))
3664            elif op == '+':  # Column spanner.
3665                cells.append(Cell(data, span_spec, align_spec, style))
3666            else:
3667                self.error('illegal table cell operator')
3668        text = '\n'.join(text)
3669        separator = '(?ms)' + self.parameters.separator
3670        format = self.parameters.format
3671        start = 0
3672        span = None
3673        op = None
3674        align = None
3675        style = None
3676        cells = []
3677        data = ''
3678        for mo in re.finditer(separator, text):
3679            data += text[start:mo.start()]
3680            if data.endswith('\\'):
3681                data = data[:-1] + mo.group()  # Reinstate escaped separators.
3682            else:
3683                append_cell(data, span, op, align, style)
3684                span = mo.groupdict().get('span')
3685                op = mo.groupdict().get('op')
3686                align = mo.groupdict().get('align')
3687                style = mo.groupdict().get('style')
3688                if style:
3689                    style = self.get_style(style)
3690                data = ''
3691            start = mo.end()
3692        # Last cell follows final separator.
3693        data += text[start:]
3694        append_cell(data, span, op, align, style)
3695        # We expect a dummy blank item preceding the first PSV cell.
3696        if format == 'psv':
3697            if cells[0].data.strip() != '':
3698                self.error('missing leading separator: %s' % separator, self.start)
3699            else:
3700                cells.pop(0)
3701        return cells
3702
3703    def translate(self):
3704        AbstractBlock.translate(self)
3705        reader.read()   # Discard delimiter.
3706        # Reset instance specific properties.
3707        self.columns = []
3708        self.rows = []
3709        attrs = {}
3710        BlockTitle.consume(attrs)
3711        # Mix in document attribute list.
3712        AttributeList.consume(attrs)
3713        self.merge_attributes(attrs)
3714        self.validate_attributes()
3715        # Add global and calculated configuration parameters.
3716        self.attributes['pagewidth'] = config.pagewidth
3717        self.attributes['pageunits'] = config.pageunits
3718        self.attributes['tableabswidth'] = int(self.abswidth)
3719        self.attributes['tablepcwidth'] = int(self.pcwidth)
3720        # Read the entire table.
3721        text = reader.read_until(self.delimiter)
3722        if reader.eof():
3723            self.error('missing closing delimiter', self.start)
3724        else:
3725            delimiter = reader.read()   # Discard closing delimiter.
3726            assert re.match(self.delimiter, delimiter)
3727        if len(text) == 0:
3728            message.warning('[%s] table is empty' % self.defname)
3729            return
3730        self.push_blockname('table')
3731        cols = attrs.get('cols')
3732        if not cols:
3733            # Calculate column count from number of items in first line.
3734            if self.parameters.format == 'csv':
3735                cols = text[0].count(self.parameters.separator) + 1
3736            else:
3737                cols = 0
3738                for cell in self.parse_psv_dsv(text[:1]):
3739                    cols += cell.span
3740        self.parse_cols(cols, attrs.get('halign'), attrs.get('valign'))
3741        # Set calculated attributes.
3742        self.attributes['colcount'] = len(self.columns)
3743        self.build_colspecs()
3744        self.parse_rows(text)
3745        # The 'rowcount' attribute is used by the experimental LaTeX backend.
3746        self.attributes['rowcount'] = str(len(self.rows))
3747        # Generate headrows, footrows, bodyrows.
3748        # Headrow, footrow and bodyrow data replaces same named attributes in
3749        # the table markup template. In order to ensure this data does not get
3750        # a second attribute substitution (which would interfere with any
3751        # substituted already inline passthroughs) unique placeholders are used
3752        # (the tab character does not appear elsewhere since it is expanded on
3753        # input) which are replaced after template attribute substitution.
3754        headrows = footrows = bodyrows = None
3755        for option in self.parameters.options:
3756            self.attributes[option + '-option'] = ''
3757        if self.rows and 'header' in self.parameters.options:
3758            headrows = self.subs_rows(self.rows[0:1], 'header')
3759            self.attributes['headrows'] = '\x07headrows\x07'
3760            self.rows = self.rows[1:]
3761        if self.rows and 'footer' in self.parameters.options:
3762            footrows = self.subs_rows(self.rows[-1:], 'footer')
3763            self.attributes['footrows'] = '\x07footrows\x07'
3764            self.rows = self.rows[:-1]
3765        if self.rows:
3766            bodyrows = self.subs_rows(self.rows)
3767            self.attributes['bodyrows'] = '\x07bodyrows\x07'
3768        table = subs_attrs(config.sections[self.parameters.template],
3769                           self.attributes)
3770        table = writer.newline.join(table)
3771        # Before we finish replace the table head, foot and body place holders
3772        # with the real data.
3773        if headrows:
3774            table = table.replace('\x07headrows\x07', headrows, 1)
3775        if footrows:
3776            table = table.replace('\x07footrows\x07', footrows, 1)
3777        if bodyrows:
3778            table = table.replace('\x07bodyrows\x07', bodyrows, 1)
3779        writer.write(table, trace='table')
3780        self.pop_blockname()
3781
3782
3783class Tables(AbstractBlocks):
3784    """List of tables."""
3785    BLOCK_TYPE = Table
3786    PREFIX = 'tabledef-'
3787    TAGS = ('colspec', 'headrow', 'footrow', 'bodyrow', 'headdata', 'footdata', 'bodydata', 'paragraph')
3788
3789    def __init__(self):
3790        AbstractBlocks.__init__(self)
3791        # Table tags dictionary. Each entry is a tags dictionary.
3792        self.tags = {}
3793
3794    def load(self, sections):
3795        AbstractBlocks.load(self, sections)
3796        self.load_tags(sections)
3797
3798    def load_tags(self, sections):
3799        """
3800        Load tabletags-* conf file sections to self.tags.
3801        """
3802        for section in list(sections.keys()):
3803            mo = re.match(r'^tabletags-(?P<name>\w+)$', section)
3804            if mo:
3805                name = mo.group('name')
3806                if name in self.tags:
3807                    d = self.tags[name]
3808                else:
3809                    d = AttrDict()
3810                parse_entries(sections.get(section, ()), d)
3811                for k in list(d.keys()):
3812                    if k not in self.TAGS:
3813                        message.warning('[%s] contains illegal table tag: %s' % (section, k))
3814                self.tags[name] = d
3815
3816    def validate(self):
3817        AbstractBlocks.validate(self)
3818        # Check we have a default table definition,
3819        for i in range(len(self.blocks)):
3820            if self.blocks[i].defname == 'tabledef-default':
3821                default = self.blocks[i]
3822                break
3823        else:
3824            raise EAsciiDoc('missing section: [tabledef-default]')
3825        # Propagate defaults to unspecified table parameters.
3826        for b in self.blocks:
3827            if b is not default:
3828                if b.format is None:
3829                    b.format = default.format
3830                if b.template is None:
3831                    b.template = default.template
3832        # Check tags and propagate default tags.
3833        if 'default' not in self.tags:
3834            raise EAsciiDoc('missing section: [tabletags-default]')
3835        default = self.tags['default']
3836        for tag in ('bodyrow', 'bodydata', 'paragraph'):  # Mandatory default tags.
3837            if tag not in default:
3838                raise EAsciiDoc('missing [tabletags-default] entry: %s' % tag)
3839        for t in list(self.tags.values()):
3840            if t is not default:
3841                if t.colspec is None:
3842                    t.colspec = default.colspec
3843                if t.headrow is None:
3844                    t.headrow = default.headrow
3845                if t.footrow is None:
3846                    t.footrow = default.footrow
3847                if t.bodyrow is None:
3848                    t.bodyrow = default.bodyrow
3849                if t.headdata is None:
3850                    t.headdata = default.headdata
3851                if t.footdata is None:
3852                    t.footdata = default.footdata
3853                if t.bodydata is None:
3854                    t.bodydata = default.bodydata
3855                if t.paragraph is None:
3856                    t.paragraph = default.paragraph
3857        # Use body tags if header and footer tags are not specified.
3858        for t in list(self.tags.values()):
3859            if not t.headrow:
3860                t.headrow = t.bodyrow
3861            if not t.footrow:
3862                t.footrow = t.bodyrow
3863            if not t.headdata:
3864                t.headdata = t.bodydata
3865            if not t.footdata:
3866                t.footdata = t.bodydata
3867        # Check table definitions are valid.
3868        for b in self.blocks:
3869            b.validate()
3870
3871    def dump(self):
3872        AbstractBlocks.dump(self)
3873        for k, v in list(self.tags.items()):
3874            dump_section('tabletags-' + k, v)
3875
3876
3877class Macros:
3878    # Default system macro syntax.
3879    SYS_RE = r'^(?P<name>[\\]?\w(\w|-)*?)::(?P<target>\S*?)' + \
3880             r'(\[(?P<attrlist>.*?)\])$'
3881
3882    def __init__(self):
3883        self.macros = []        # List of Macros.
3884        self.current = None     # The last matched block macro.
3885        self.passthroughs = []
3886        # Initialize default system macro.
3887        m = Macro()
3888        m.pattern = self.SYS_RE
3889        m.prefix = '+'
3890        m.reo = re.compile(m.pattern)
3891        self.macros.append(m)
3892
3893    def load(self, entries):
3894        for entry in entries:
3895            m = Macro()
3896            m.load(entry)
3897            if m.name is None:
3898                # Delete undefined macro.
3899                for i, m2 in enumerate(self.macros):
3900                    if m2.pattern == m.pattern:
3901                        del self.macros[i]
3902                        break
3903                else:
3904                    message.warning('unable to delete missing macro: %s' % m.pattern)
3905            else:
3906                # Check for duplicates.
3907                for m2 in self.macros:
3908                    if m2.pattern == m.pattern:
3909                        message.verbose('macro redefinition: %s%s' % (m.prefix, m.name))
3910                        break
3911                else:
3912                    self.macros.append(m)
3913
3914    def dump(self):
3915        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
3916        write('[macros]')
3917        # Dump all macros except the first (built-in system) macro.
3918        for m in self.macros[1:]:
3919            # Escape = in pattern.
3920            macro = '%s=%s%s' % (m.pattern.replace('=', r'\='), m.prefix, m.name)
3921            if m.subslist is not None:
3922                macro += '[' + ','.join(m.subslist) + ']'
3923            write(macro)
3924        write('')
3925
3926    def validate(self):
3927        # Check all named sections exist.
3928        if config.verbose:
3929            for m in self.macros:
3930                if m.name and m.prefix != '+':
3931                    m.section_name()
3932
3933    def subs(self, text, prefix='', callouts=False):
3934        # If callouts is True then only callout macros are processed, if False
3935        # then all non-callout macros are processed.
3936        result = text
3937        for m in self.macros:
3938            if m.prefix == prefix:
3939                if callouts ^ (m.name != 'callout'):
3940                    result = m.subs(result)
3941        return result
3942
3943    def isnext(self):
3944        """Return matching macro if block macro is next on reader."""
3945        reader.skip_blank_lines()
3946        line = reader.read_next()
3947        if line:
3948            for m in self.macros:
3949                if m.prefix == '#':
3950                    if m.reo.match(line):
3951                        self.current = m
3952                        return m
3953        return False
3954
3955    @lru_cache(maxsize=2048)
3956    def match(self, prefix, name, text):
3957        """Return re match object matching 'text' with macro type 'prefix',
3958        macro name 'name'."""
3959        for m in self.macros:
3960            if m.prefix == prefix:
3961                mo = m.reo.match(text)
3962                if mo:
3963                    if m.name == name:
3964                        return mo
3965                    if re.match(name, mo.group('name')):
3966                        return mo
3967        return None
3968
3969    def extract_passthroughs(self, text, prefix=''):
3970        """ Extract the passthrough text and replace with temporary
3971        placeholders."""
3972        self.passthroughs = []
3973        for m in self.macros:
3974            if m.has_passthrough() and m.prefix == prefix:
3975                text = m.subs_passthroughs(text, self.passthroughs)
3976        return text
3977
3978    def restore_passthroughs(self, text):
3979        """ Replace passthough placeholders with the original passthrough text."""
3980        for i, v in enumerate(self.passthroughs):
3981            text = text.replace('\x07' + str(i) + '\x07', self.passthroughs[i])
3982        return text
3983
3984
3985class Macro:
3986    def __init__(self):
3987        self.pattern = None     # Matching regular expression.
3988        self.name = ''          # Conf file macro name (None if implicit).
3989        self.prefix = ''        # '' if inline, '+' if system, '#' if block.
3990        self.reo = None         # Compiled pattern re object.
3991        self.subslist = []      # Default subs for macros passtext group.
3992
3993    def has_passthrough(self):
3994        return self.pattern.find(r'(?P<passtext>') >= 0
3995
3996    def section_name(self, name=None):
3997        """Return macro markup template section name based on macro name and
3998        prefix.  Return None section not found."""
3999        assert self.prefix != '+'
4000        if not name:
4001            assert self.name
4002            name = self.name
4003        if self.prefix == '#':
4004            suffix = '-blockmacro'
4005        else:
4006            suffix = '-inlinemacro'
4007        if name + suffix in config.sections:
4008            return name + suffix
4009        else:
4010            message.warning('missing macro section: [%s]' % (name + suffix))
4011            return None
4012
4013    def load(self, entry):
4014        e = parse_entry(entry)
4015        if e is None:
4016            # Only the macro pattern was specified, mark for deletion.
4017            self.name = None
4018            self.pattern = entry
4019            return
4020        if not is_re(e[0]):
4021            raise EAsciiDoc('illegal macro regular expression: %s' % e[0])
4022        pattern, name = e
4023        if name and name[0] in ('+', '#'):
4024            prefix, name = name[0], name[1:]
4025        else:
4026            prefix = ''
4027        # Parse passthrough subslist.
4028        mo = re.match(r'^(?P<name>[^[]*)(\[(?P<subslist>.*)\])?$', name)
4029        name = mo.group('name')
4030        if name and not is_name(name):
4031            raise EAsciiDoc('illegal section name in macro entry: %s' % entry)
4032        subslist = mo.group('subslist')
4033        if subslist is not None:
4034            # Parse and validate passthrough subs.
4035            subslist = parse_options(subslist, SUBS_OPTIONS, 'illegal subs in macro entry: %s' % entry)
4036        self.pattern = pattern
4037        self.reo = re.compile(pattern)
4038        self.prefix = prefix
4039        self.name = name
4040        self.subslist = subslist or []
4041
4042    def subs(self, text):
4043        def subs_func(mo):
4044            """Function called to perform macro substitution.
4045            Uses matched macro regular expression object and returns string
4046            containing the substituted macro body."""
4047            # Check if macro reference is escaped.
4048            if mo.group()[0] == '\\':
4049                return mo.group()[1:]   # Strip leading backslash.
4050            d = mo.groupdict()
4051            # Delete groups that didn't participate in match.
4052            for k, v in list(d.items()):
4053                if v is None:
4054                    del d[k]
4055            if self.name:
4056                name = self.name
4057            else:
4058                if 'name' not in d:
4059                    message.warning('missing macro name group: %s' % mo.re.pattern)
4060                    return ''
4061                name = d['name']
4062            section_name = self.section_name(name)
4063            if not section_name:
4064                return ''
4065            # If we're dealing with a block macro get optional block ID and
4066            # block title.
4067            if self.prefix == '#' and self.name != 'comment':
4068                AttributeList.consume(d)
4069                BlockTitle.consume(d)
4070            # Parse macro attributes.
4071            if 'attrlist' in d:
4072                if d['attrlist'] in (None, ''):
4073                    del d['attrlist']
4074                else:
4075                    if self.prefix == '':
4076                        # Un-escape ] characters in inline macros.
4077                        d['attrlist'] = d['attrlist'].replace('\\]', ']')
4078                    parse_attributes(d['attrlist'], d)
4079                    # Generate option attributes.
4080                    if 'options' in d:
4081                        options = parse_options(d['options'], (), '%s: illegal option name' % name)
4082                        for option in options:
4083                            d[option + '-option'] = ''
4084                    # Substitute single quoted attribute values in block macros.
4085                    if self.prefix == '#':
4086                        AttributeList.subs(d)
4087            if name == 'callout':
4088                listindex = int(d['index'])
4089                d['coid'] = calloutmap.add(listindex)
4090            # The alt attribute is the first image macro positional attribute.
4091            if name == 'image' and '1' in d:
4092                d['alt'] = d['1']
4093            # Un-escape special characters in LaTeX target file names.
4094            if document.backend == 'latex' and 'target' in d and d['target']:
4095                if '0' not in d:
4096                    d['0'] = d['target']
4097                d['target'] = config.subs_specialchars_reverse(d['target'])
4098            # BUG: We've already done attribute substitution on the macro which
4099            # means that any escaped attribute references are now unescaped and
4100            # will be substituted by config.subs_section() below. As a partial
4101            # fix have withheld {0} from substitution but this kludge doesn't
4102            # fix it for other attributes containing unescaped references.
4103            # Passthrough macros don't have this problem.
4104            a0 = d.get('0')
4105            if a0:
4106                d['0'] = chr(0)  # Replace temporarily with unused character.
4107            body = config.subs_section(section_name, d)
4108            if len(body) == 0:
4109                result = ''
4110            elif len(body) == 1:
4111                result = body[0]
4112            else:
4113                if self.prefix == '#':
4114                    result = writer.newline.join(body)
4115                else:
4116                    # Internally processed inline macros use UNIX line
4117                    # separator.
4118                    result = '\n'.join(body)
4119            if a0:
4120                result = result.replace(chr(0), a0)
4121            return result
4122
4123        return self.reo.sub(subs_func, text)
4124
4125    def translate(self):
4126        """ Block macro translation."""
4127        assert self.prefix == '#'
4128        s = reader.read()
4129        before = s
4130        if self.has_passthrough():
4131            s = macros.extract_passthroughs(s, '#')
4132        s = subs_attrs(s)
4133        if s:
4134            s = self.subs(s)
4135            if self.has_passthrough():
4136                s = macros.restore_passthroughs(s)
4137            if s:
4138                trace('macro block', before, s)
4139                writer.write(s)
4140
4141    def subs_passthroughs(self, text, passthroughs):
4142        """ Replace macro attribute lists in text with placeholders.
4143        Substitute and append the passthrough attribute lists to the
4144        passthroughs list."""
4145        def subs_func(mo):
4146            """Function called to perform inline macro substitution.
4147            Uses matched macro regular expression object and returns string
4148            containing the substituted macro body."""
4149            # Don't process escaped macro references.
4150            if mo.group()[0] == '\\':
4151                return mo.group()
4152            d = mo.groupdict()
4153            if 'passtext' not in d:
4154                message.warning('passthrough macro %s: missing passtext group' % d.get('name', ''))
4155                return mo.group()
4156            passtext = d['passtext']
4157            if re.search('\x07\\d+\x07', passtext):
4158                message.warning('nested inline passthrough')
4159                return mo.group()
4160            if d.get('subslist'):
4161                if d['subslist'].startswith(':'):
4162                    message.error('block macro cannot occur here: %s' % mo.group(), halt=True)
4163                subslist = parse_options(d['subslist'], SUBS_OPTIONS, 'illegal passthrough macro subs option')
4164            else:
4165                subslist = self.subslist
4166            passtext = Lex.subs_1(passtext, subslist)
4167            if passtext is None:
4168                passtext = ''
4169            if self.prefix == '':
4170                # Un-escape ] characters in inline macros.
4171                passtext = passtext.replace('\\]', ']')
4172            passthroughs.append(passtext)
4173            # Tabs guarantee the placeholders are unambiguous.
4174            result = (
4175                text[mo.start():mo.start('passtext')] +
4176                '\x07' + str(len(passthroughs) - 1) + '\x07' +
4177                text[mo.end('passtext'):mo.end()]
4178            )
4179            return result
4180
4181        return self.reo.sub(subs_func, text)
4182
4183
4184class CalloutMap:
4185    def __init__(self):
4186        self.comap = {}         # key = list index, value = callouts list.
4187        self.calloutindex = 0   # Current callout index number.
4188        self.listnumber = 1     # Current callout list number.
4189
4190    def listclose(self):
4191        # Called when callout list is closed.
4192        self.listnumber += 1
4193        self.calloutindex = 0
4194        self.comap = {}
4195
4196    def add(self, listindex):
4197        # Add next callout index to listindex map entry. Return the callout id.
4198        self.calloutindex += 1
4199        # Append the coindex to a list in the comap dictionary.
4200        if listindex not in self.comap:
4201            self.comap[listindex] = [self.calloutindex]
4202        else:
4203            self.comap[listindex].append(self.calloutindex)
4204        return self.calloutid(self.listnumber, self.calloutindex)
4205
4206    @staticmethod
4207    def calloutid(listnumber, calloutindex):
4208        return 'CO%d-%d' % (listnumber, calloutindex)
4209
4210    def calloutids(self, listindex):
4211        # Retrieve list of callout indexes that refer to listindex.
4212        if listindex in self.comap:
4213            result = ''
4214            for coindex in self.comap[listindex]:
4215                result += ' ' + self.calloutid(self.listnumber, coindex)
4216            return result.strip()
4217        else:
4218            message.warning('no callouts refer to list item ' + str(listindex))
4219            return ''
4220
4221    def validate(self, maxlistindex):
4222        # Check that all list indexes referenced by callouts exist.
4223        for listindex in list(self.comap.keys()):
4224            if listindex > maxlistindex:
4225                message.warning('callout refers to non-existent list item ' + str(listindex))
4226
4227# ---------------------------------------------------------------------------
4228# Input stream Reader and output stream writer classes.
4229# ---------------------------------------------------------------------------
4230
4231
4232UTF8_BOM = b'\xef\xbb\xbf'.decode('utf-8')
4233
4234
4235class Reader1:
4236    """Line oriented AsciiDoc input file reader. Processes include and
4237    conditional inclusion system macros. Tabs are expanded and lines are right
4238    trimmed."""
4239    # This class is not used directly, use Reader class instead.
4240    READ_BUFFER_MIN = 10        # Read buffer low level.
4241
4242    def __init__(self):
4243        self.f = None           # Input file object.
4244        self.fname = None       # Input file name.
4245        self.next = []          # Read ahead buffer containing [filename,linenumber,linetext] lists.
4246        self.cursor = None      # Last read() [filename,linenumber,linetext].
4247        self.tabsize = 8        # Tab expansion number of spaces.
4248        self.parent = None      # Included reader's parent reader.
4249        self._lineno = 0        # The last line read from file object f.
4250        self.line_ranges = None # line ranges to include
4251        self.current_depth = 0  # Current include depth.
4252        self.max_depth = 10     # Initial maxiumum allowed include depth.
4253        self.bom = None         # Byte order mark (BOM).
4254        self.infile = None      # Saved document 'infile' attribute.
4255        self.indir = None       # Saved document 'indir' attribute.
4256
4257    def open(self, fname):
4258        self.fname = fname
4259        message.verbose('reading: ' + fname)
4260        if fname == '<stdin>':
4261            self.f = sys.stdin
4262            self.infile = None
4263            self.indir = None
4264        else:
4265            self.f = open(fname, 'r', encoding='utf-8')
4266            self.infile = fname
4267            self.indir = os.path.dirname(fname)
4268        document.attributes['infile'] = self.infile
4269        document.attributes['indir'] = self.indir
4270        self._lineno = 0            # The last line read from file object f.
4271        self.next = []
4272        # Pre-fill buffer by reading the first line and then pushing it back.
4273        if self.read():
4274            if self.cursor[2].startswith(UTF8_BOM):
4275                self.cursor[2] = self.cursor[2][len(UTF8_BOM):]
4276                self.bom = UTF8_BOM
4277            self.unread(self.cursor)
4278            self.cursor = None
4279
4280    def closefile(self):
4281        """Used by class methods to close nested include files."""
4282        self.f.close()
4283        self.next = []
4284
4285    def close(self):
4286        self.closefile()
4287        self.__init__()
4288
4289    def readline(self):
4290        while True:
4291            s = self.f.readline()
4292            if s:
4293                self._lineno = self._lineno + 1
4294            else:
4295                break
4296
4297            if self.line_ranges is not None:
4298                for line_range in self.line_ranges:
4299                    if len(line_range) == 1 and self._lineno == line_range[0]:
4300                        break
4301                    elif len(line_range) == 2 and line_range[0] <= self._lineno and (line_range[1] == -1 or self._lineno <= line_range[1]):
4302                        break
4303                else:
4304                    continue
4305                break
4306            else:
4307                break
4308        return s
4309
4310    def read(self, skip=False):
4311        """Read next line. Return None if EOF. Expand tabs. Strip trailing
4312        white space. Maintain self.next read ahead buffer. If skip=True then
4313        conditional exclusion is active (ifdef and ifndef macros)."""
4314        # Top up buffer.
4315        if len(self.next) <= self.READ_BUFFER_MIN:
4316            s = self.readline()
4317            while s:
4318                if self.tabsize != 0:
4319                    s = s.expandtabs(self.tabsize)
4320                s = s.rstrip()
4321                self.next.append([self.fname, self._lineno, s])
4322                if len(self.next) > self.READ_BUFFER_MIN:
4323                    break
4324                s = self.readline()
4325        # Return first (oldest) buffer entry.
4326        if len(self.next) > 0:
4327            self.cursor = self.next[0]
4328            del self.next[0]
4329            result = self.cursor[2]
4330            # Check for include macro.
4331            mo = macros.match('+', r'^include[1]?$', result)
4332            if mo and not skip:
4333                # Parse include macro attributes.
4334                attrs = {}
4335                parse_attributes(mo.group('attrlist'), attrs)
4336                warnings = attrs.get('warnings', True)
4337                # Don't process include macro once the maximum depth is reached.
4338                if self.current_depth >= self.max_depth:
4339                    message.warning('maximum include depth exceeded')
4340                    return result
4341                # Perform attribute substitution on include macro file name.
4342                fname = subs_attrs(mo.group('target'))
4343                if not fname:
4344                    return Reader1.read(self)   # Return next input line.
4345                if self.fname != '<stdin>':
4346                    fname = os.path.expandvars(os.path.expanduser(fname))
4347                    fname = safe_filename(fname, os.path.dirname(self.fname))
4348                    if not fname:
4349                        return Reader1.read(self)   # Return next input line.
4350                    if not os.path.isfile(fname):
4351                        if warnings:
4352                            message.warning('include file not found: %s' % fname)
4353                        return Reader1.read(self)   # Return next input line.
4354                    if mo.group('name') == 'include1':
4355                        if not config.dumping:
4356                            if fname not in config.include1:
4357                                message.verbose('include1: ' + fname, linenos=False)
4358                                # Store the include file in memory for later
4359                                # retrieval by the {include1:} system attribute.
4360                                with open(fname, encoding='utf-8') as f:
4361                                    config.include1[fname] = [s.rstrip() for s in f]
4362                            return '{include1:%s}' % fname
4363                        else:
4364                            # This is a configuration dump, just pass the macro
4365                            # call through.
4366                            return result
4367                # Clone self and set as parent (self assumes the role of child).
4368                parent = Reader1()
4369                assign(parent, self)
4370                self.parent = parent
4371                # Set attributes in child.
4372                if 'tabsize' in attrs:
4373                    try:
4374                        val = int(attrs['tabsize'])
4375                        if not val >= 0:
4376                            raise ValueError('not >= 0')
4377                        self.tabsize = val
4378                    except ValueError:
4379                        raise EAsciiDoc('illegal include macro tabsize argument')
4380                else:
4381                    self.tabsize = config.tabsize
4382                if 'depth' in attrs:
4383                    try:
4384                        val = int(attrs['depth'])
4385                        if not val >= 1:
4386                            raise ValueError('not >= 1')
4387                        self.max_depth = self.current_depth + val
4388                    except ValueError:
4389                        raise EAsciiDoc("include macro: illegal 'depth' argument")
4390                if 'lines' in attrs:
4391                    try:
4392                        if ';' in attrs['lines']:
4393                            ranges = attrs['lines'].split(';')
4394                        else:
4395                            ranges = attrs['lines'].split(',')
4396                        for idx in range(len(ranges)):
4397                            ranges[idx] = [int(x) for x in ranges[idx].split('..')]
4398                        self.line_ranges = ranges
4399                    except ValueError:
4400                        raise EAsciiDoc("include macro: illegal 'lines' argument")
4401                # Process included file.
4402                message.verbose('include: ' + fname, linenos=False)
4403                self.open(fname)
4404                self.current_depth = self.current_depth + 1
4405                result = Reader1.read(self)
4406        else:
4407            if not Reader1.eof(self):
4408                result = Reader1.read(self)
4409            else:
4410                result = None
4411        return result
4412
4413    def eof(self):
4414        """Returns True if all lines have been read."""
4415        if len(self.next) == 0:
4416            # End of current file.
4417            if self.parent:
4418                self.closefile()
4419                assign(self, self.parent)    # Restore parent reader.
4420                document.attributes['infile'] = self.infile
4421                document.attributes['indir'] = self.indir
4422                return Reader1.eof(self)
4423            else:
4424                return True
4425        else:
4426            return False
4427
4428    def read_next(self):
4429        """Like read() but does not advance file pointer."""
4430        if Reader1.eof(self):
4431            return None
4432        else:
4433            return self.next[0][2]
4434
4435    def unread(self, cursor):
4436        """Push the line (filename,linenumber,linetext) tuple back into the read
4437        buffer. Note that it's up to the caller to restore the previous
4438        cursor."""
4439        assert cursor
4440        self.next.insert(0, cursor)
4441
4442
4443class Reader(Reader1):
4444    """ Wraps (well, sought of) Reader1 class and implements conditional text
4445    inclusion."""
4446    def __init__(self):
4447        Reader1.__init__(self)
4448        self.depth = 0          # if nesting depth.
4449        self.skip = False       # true if we're skipping ifdef...endif.
4450        self.skipname = ''      # Name of current endif macro target.
4451        self.skipto = -1        # The depth at which skipping is re-enabled.
4452
4453    def read_super(self):
4454        result = Reader1.read(self, self.skip)
4455        if result is None and self.skip:
4456            raise EAsciiDoc('missing endif::%s[]' % self.skipname)
4457        return result
4458
4459    def read(self):
4460        result = self.read_super()
4461        if result is None:
4462            return None
4463        while self.skip:
4464            mo = macros.match('+', r'ifdef|ifndef|ifeval|endif', result)
4465            if mo:
4466                name = mo.group('name')
4467                target = mo.group('target')
4468                attrlist = mo.group('attrlist')
4469                if name == 'endif':
4470                    self.depth -= 1
4471                    if self.depth < 0:
4472                        raise EAsciiDoc('mismatched macro: %s' % result)
4473                    if self.depth == self.skipto:
4474                        self.skip = False
4475                        if target and self.skipname != target:
4476                            raise EAsciiDoc('mismatched macro: %s' % result)
4477                else:
4478                    if name in ('ifdef', 'ifndef'):
4479                        if not target:
4480                            raise EAsciiDoc('missing macro target: %s' % result)
4481                        if not attrlist:
4482                            self.depth += 1
4483                    elif name == 'ifeval':
4484                        if not attrlist:
4485                            raise EAsciiDoc('missing ifeval condition: %s' % result)
4486                        self.depth += 1
4487            result = self.read_super()
4488            if result is None:
4489                return None
4490        mo = macros.match('+', r'ifdef|ifndef|ifeval|endif', result)
4491        if mo:
4492            name = mo.group('name')
4493            target = mo.group('target')
4494            attrlist = mo.group('attrlist')
4495            if name == 'endif':
4496                self.depth = self.depth - 1
4497            else:
4498                if not target and name in ('ifdef', 'ifndef'):
4499                    raise EAsciiDoc('missing macro target: %s' % result)
4500                defined = is_attr_defined(target, document.attributes)
4501                if name == 'ifdef':
4502                    if attrlist:
4503                        if defined:
4504                            return attrlist
4505                    else:
4506                        self.skip = not defined
4507                elif name == 'ifndef':
4508                    if attrlist:
4509                        if not defined:
4510                            return attrlist
4511                    else:
4512                        self.skip = defined
4513                elif name == 'ifeval':
4514                    if safe():
4515                        message.unsafe('ifeval invalid')
4516                        raise EAsciiDoc('ifeval invalid safe document')
4517                    if not attrlist:
4518                        raise EAsciiDoc('missing ifeval condition: %s' % result)
4519                    cond = False
4520                    attrlist = subs_attrs(attrlist)
4521                    if attrlist:
4522                        try:
4523                            cond = eval(attrlist)
4524                        except Exception as e:
4525                            raise EAsciiDoc('error evaluating ifeval condition: %s: %s' % (result, str(e)))
4526                        message.verbose('ifeval: %s: %r' % (attrlist, cond))
4527                    self.skip = not cond
4528                if not attrlist or name == 'ifeval':
4529                    if self.skip:
4530                        self.skipto = self.depth
4531                        self.skipname = target
4532                    self.depth = self.depth + 1
4533            result = self.read()
4534        if result:
4535            # Expand executable block macros.
4536            mo = macros.match('+', r'eval|sys|sys2', result)
4537            if mo:
4538                action = mo.group('name')
4539                cmd = mo.group('attrlist')
4540                result = system(action, cmd, is_macro=True)
4541                self.cursor[2] = result  # So we don't re-evaluate.
4542        if result:
4543            # Un=escape escaped system macros.
4544            if macros.match('+', r'\\eval|\\sys|\\sys2|\\ifdef|\\ifndef|\\endif|\\include|\\include1', result):
4545                result = result[1:]
4546        return result
4547
4548    def eof(self):
4549        return self.read_next() is None
4550
4551    def read_next(self):
4552        save_cursor = self.cursor
4553        result = self.read()
4554        if result is not None:
4555            self.unread(self.cursor)
4556            self.cursor = save_cursor
4557        return result
4558
4559    def read_lines(self, count=1):
4560        """Return tuple containing count lines."""
4561        result = []
4562        i = 0
4563        while i < count and not self.eof():
4564            result.append(self.read())
4565        return tuple(result)
4566
4567    def read_ahead(self, count=1):
4568        """Same as read_lines() but does not advance the file pointer."""
4569        result = []
4570        putback = []
4571        save_cursor = self.cursor
4572        try:
4573            i = 0
4574            while i < count and not self.eof():
4575                result.append(self.read())
4576                putback.append(self.cursor)
4577                i = i + 1
4578            while putback:
4579                self.unread(putback.pop())
4580        finally:
4581            self.cursor = save_cursor
4582        return tuple(result)
4583
4584    @staticmethod
4585    def skip_blank_lines():
4586        reader.read_until(r'\s*\S+')
4587
4588    def read_until(self, terminators, same_file=False):
4589        """Like read() but reads lines up to (but not including) the first line
4590        that matches the terminator regular expression, regular expression
4591        object or list of regular expression objects. If same_file is True then
4592        the terminating pattern must occur in the file the was being read when
4593        the routine was called."""
4594        if same_file:
4595            fname = self.cursor[0]
4596        result = []
4597        if not isinstance(terminators, list):
4598            if isinstance(terminators, str):
4599                terminators = [re.compile(terminators)]
4600            else:
4601                terminators = [terminators]
4602        while not self.eof():
4603            save_cursor = self.cursor
4604            s = self.read()
4605            if not same_file or fname == self.cursor[0]:
4606                for reo in terminators:
4607                    if reo.match(s):
4608                        self.unread(self.cursor)
4609                        self.cursor = save_cursor
4610                        return tuple(result)
4611            result.append(s)
4612        return tuple(result)
4613
4614
4615class Writer:
4616    """Writes lines to output file."""
4617    def __init__(self):
4618        self.newline = DEFAULT_NEWLINE   # End of line terminator.
4619        self.f = None                    # Output file object.
4620        self.fname = None                # Output file name.
4621        self.lines_out = 0               # Number of lines written.
4622        self.skip_blank_lines = False    # If True don't output blank lines.
4623
4624    def open(self, fname, bom=None):
4625        """
4626        bom is optional byte order mark.
4627        http://en.wikipedia.org/wiki/Byte-order_mark
4628        """
4629        self.fname = fname
4630        if fname == '<stdout>':
4631            self.f = sys.stdout
4632        else:
4633            self.f = open(fname, 'w+', encoding='utf-8', newline="")
4634        message.verbose('writing: ' + writer.fname, False)
4635        if bom:
4636            self.f.write(bom)
4637        self.lines_out = 0
4638
4639    def close(self):
4640        if self.fname != '<stdout>':
4641            self.f.close()
4642
4643    def write_line(self, line=None):
4644        if not (self.skip_blank_lines and (not line or not line.strip())):
4645            self.f.write((line or '') + self.newline)
4646            self.lines_out = self.lines_out + 1
4647
4648    def write(self, *args, **kwargs):
4649        """Iterates arguments, writes tuple and list arguments one line per
4650        element, else writes argument as single line. If no arguments writes
4651        blank line. If argument is None nothing is written. self.newline is
4652        appended to each line."""
4653        if 'trace' in kwargs and len(args) > 0:
4654            trace(kwargs['trace'], args[0])
4655        if len(args) == 0:
4656            self.write_line()
4657            self.lines_out = self.lines_out + 1
4658        else:
4659            for arg in args:
4660                if is_array(arg):
4661                    for s in arg:
4662                        self.write_line(s)
4663                elif arg is not None:
4664                    self.write_line(arg)
4665
4666    def write_tag(self, tag, content, subs=None, d=None, **kwargs):
4667        """Write content enveloped by tag.
4668        Substitutions specified in the 'subs' list are perform on the
4669        'content'."""
4670        if subs is None:
4671            subs = config.subsnormal
4672        stag, etag = subs_tag(tag, d)
4673        content = Lex.subs(content, subs)
4674        if 'trace' in kwargs:
4675            trace(kwargs['trace'], [stag] + content + [etag])
4676        if stag:
4677            self.write(stag)
4678        if content:
4679            self.write(content)
4680        if etag:
4681            self.write(etag)
4682
4683
4684# ---------------------------------------------------------------------------
4685# Configuration file processing.
4686# ---------------------------------------------------------------------------
4687def _subs_specialwords(mo):
4688    """Special word substitution function called by
4689    Config.subs_specialwords()."""
4690    word = mo.re.pattern                    # The special word.
4691    template = config.specialwords[word]    # The corresponding markup template.
4692    if template not in config.sections:
4693        raise EAsciiDoc('missing special word template [%s]' % template)
4694    if mo.group()[0] == '\\':
4695        return mo.group()[1:]   # Return escaped word.
4696    args = {}
4697    args['words'] = mo.group()  # The full match string is argument 'words'.
4698    args.update(mo.groupdict())  # Add other named match groups to the arguments.
4699    # Delete groups that didn't participate in match.
4700    for k, v in list(args.items()):
4701        if v is None:
4702            del args[k]
4703    lines = subs_attrs(config.sections[template], args)
4704    if len(lines) == 0:
4705        result = ''
4706    elif len(lines) == 1:
4707        result = lines[0]
4708    else:
4709        result = writer.newline.join(lines)
4710    return result
4711
4712
4713class Config:
4714    """Methods to process configuration files."""
4715    # Non-template section name regexp's.
4716    ENTRIES_SECTIONS = ('tags', 'miscellaneous', 'attributes', 'specialcharacters',
4717                        'specialwords', 'macros', 'replacements', 'quotes', 'titles',
4718                        r'paradef-.+', r'listdef-.+', r'blockdef-.+', r'tabledef-.+',
4719                        r'tabletags-.+', r'listtags-.+', 'replacements[23]', r'old_tabledef-.+')
4720
4721    def __init__(self):
4722        self.sections = OrderedDict()   # Keyed by section name containing lists of section lines.
4723        # Command-line options.
4724        self.verbose = False
4725        self.header_footer = True       # -s, --no-header-footer option.
4726        # [miscellaneous] section.
4727        self.tabsize = 8
4728        self.textwidth = 70             # DEPRECATED: Old tables only.
4729        self.newline = DEFAULT_NEWLINE
4730        self.pagewidth = None
4731        self.pageunits = None
4732        self.outfilesuffix = ''
4733        self.subsnormal = SUBS_NORMAL
4734        self.subsverbatim = SUBS_VERBATIM
4735
4736        self.tags = {}          # Values contain (stag,etag) tuples.
4737        self.specialchars = {}  # Values of special character substitutions.
4738        self.specialwords = {}  # Name is special word pattern, value is macro.
4739        self.replacements = OrderedDict()   # Key is find pattern, value is replace pattern.
4740        self.replacements2 = OrderedDict()
4741        self.replacements3 = OrderedDict()
4742        self.specialsections = {}  # Name is special section name pattern, value is corresponding section name.
4743        self.quotes = OrderedDict()    # Values contain corresponding tag name.
4744        self.fname = ''         # Most recently loaded configuration file name.
4745        self.conf_attrs = {}    # Attributes entries from conf files.
4746        self.cmd_attrs = {}     # Attributes from command-line -a options.
4747        self.loaded = []        # Loaded conf files.
4748        self.include1 = {}      # Holds include1::[] files for {include1:}.
4749        self.dumping = False    # True if asciidoc -c option specified.
4750        self.filters = []       # Filter names specified by --filter option.
4751
4752    def init(self, cmd):
4753        """
4754        Check Python version and locate the executable and configuration files
4755        directory.
4756        cmd is the asciidoc command or asciidoc.py path.
4757        """
4758        if sys.version_info[:2] < MIN_PYTHON_VERSION:
4759            message.stderr('FAILED: Python %d.%d or better required' % MIN_PYTHON_VERSION)
4760            sys.exit(1)
4761        if not os.path.exists(cmd):
4762            message.stderr('FAILED: Missing asciidoc command: %s' % cmd)
4763            sys.exit(1)
4764        global APP_FILE
4765        APP_FILE = os.path.realpath(cmd)
4766        global APP_DIR
4767        APP_DIR = os.path.dirname(APP_FILE)
4768        global USER_DIR
4769        USER_DIR = userdir()
4770        if USER_DIR is not None:
4771            USER_DIR = os.path.join(USER_DIR, '.asciidoc')
4772            if not os.path.isdir(USER_DIR):
4773                USER_DIR = None
4774
4775    def load_file(self, fname, dir=None, include=[], exclude=[]):
4776        """
4777        Loads sections dictionary with sections from file fname.
4778        Existing sections are overlaid.
4779        The 'include' list contains the section names to be loaded.
4780        The 'exclude' list contains section names not to be loaded.
4781        Return False if no file was found in any of the locations.
4782        """
4783        def update_section(section):
4784            """ Update section in sections with contents. """
4785            if section and contents:
4786                if section in sections and self.entries_section(section):
4787                    if ''.join(contents):
4788                        # Merge entries.
4789                        sections[section] += contents
4790                    else:
4791                        del sections[section]
4792                else:
4793                    if section.startswith('+'):
4794                        # Append section.
4795                        if section in sections:
4796                            sections[section] += contents
4797                        else:
4798                            sections[section] = contents
4799                    else:
4800                        # Replace section.
4801                        sections[section] = contents
4802        if dir:
4803            fname = os.path.join(dir, fname)
4804        # Silently skip missing configuration file.
4805        if not os.path.isfile(fname):
4806            return False
4807        # Don't load conf files twice (local and application conf files are the
4808        # same if the source file is in the application directory).
4809        if os.path.realpath(fname) in self.loaded:
4810            return True
4811        rdr = Reader()  # Reader processes system macros.
4812        message.linenos = False         # Disable document line numbers.
4813        rdr.open(fname)
4814        message.linenos = None
4815        self.fname = fname
4816        reo = re.compile(r'^\[(?P<section>\+?[^\W\d][\w-]*)\]\s*$')
4817        sections = OrderedDict()
4818        section, contents = '', []
4819        while not rdr.eof():
4820            s = rdr.read()
4821            if s and s[0] == '#':       # Skip comment lines.
4822                continue
4823            if s[:2] == '\\#':          # Un-escape lines starting with '#'.
4824                s = s[1:]
4825            s = s.rstrip()
4826            found = reo.findall(str(s))
4827            if found:
4828                update_section(section)  # Store previous section.
4829                section = found[0].lower()
4830                contents = []
4831            else:
4832                contents.append(s)
4833        update_section(section)         # Store last section.
4834        rdr.close()
4835        if include:
4836            for s in set(sections) - set(include):
4837                del sections[s]
4838        if exclude:
4839            for s in set(sections) & set(exclude):
4840                del sections[s]
4841        attrs = {}
4842        self.load_sections(sections, attrs)
4843        if not include:
4844            # If all sections are loaded mark this file as loaded.
4845            self.loaded.append(os.path.realpath(fname))
4846        document.update_attributes(attrs)  # So they are available immediately.
4847        return True
4848
4849    def load_sections(self, sections, attrs=None):
4850        """
4851        Loads sections dictionary. Each dictionary entry contains a
4852        list of lines.
4853        Updates 'attrs' with parsed [attributes] section entries.
4854        """
4855        # Delete trailing blank lines from sections.
4856        for k in list(sections.keys()):
4857            for i in range(len(sections[k]) - 1, -1, -1):
4858                if not sections[k][i]:
4859                    del sections[k][i]
4860                elif not self.entries_section(k):
4861                    break
4862        # Update new sections.
4863        for k, v in list(sections.items()):
4864            if k.startswith('+'):
4865                # Append section.
4866                k = k[1:]
4867                if k in self.sections:
4868                    self.sections[k] += v
4869                else:
4870                    self.sections[k] = v
4871            else:
4872                # Replace section.
4873                self.sections[k] = v
4874        self.parse_tags()
4875        # Internally [miscellaneous] section entries are just attributes.
4876        d = {}
4877        parse_entries(sections.get('miscellaneous', ()), d, unquote=True, allow_name_only=True)
4878        parse_entries(sections.get('attributes', ()), d, unquote=True, allow_name_only=True)
4879        update_attrs(self.conf_attrs, d)
4880        if attrs is not None:
4881            attrs.update(d)
4882        d = {}
4883        parse_entries(sections.get('titles', ()), d)
4884        Title.load(d)
4885        parse_entries(sections.get('specialcharacters', ()), self.specialchars, escape_delimiter=False)
4886        parse_entries(sections.get('quotes', ()), self.quotes)
4887        self.parse_specialwords()
4888        self.parse_replacements()
4889        self.parse_replacements('replacements2')
4890        self.parse_replacements('replacements3')
4891        self.parse_specialsections()
4892        paragraphs.load(sections)
4893        lists.load(sections)
4894        blocks.load(sections)
4895        tables_OLD.load(sections)
4896        tables.load(sections)
4897        macros.load(sections.get('macros', ()))
4898
4899    @staticmethod
4900    def get_load_dirs():
4901        """
4902        Return list of well known paths with conf files.
4903        """
4904        result = []
4905        if localapp():
4906            # Load from folders in asciidoc executable directory.
4907            result.append(APP_DIR)
4908        else:
4909            # Load from global configuration directory.
4910            result.append(CONF_DIR)
4911        # Load configuration files from ~/.asciidoc if it exists.
4912        if USER_DIR is not None:
4913            result.append(USER_DIR)
4914        return result
4915
4916    def find_in_dirs(self, filename, dirs=None):
4917        """
4918        Find conf files from dirs list.
4919        Return list of found file paths.
4920        Return empty list if not found in any of the locations.
4921        """
4922        result = []
4923        if dirs is None:
4924            dirs = self.get_load_dirs()
4925        for d in dirs:
4926            f = os.path.join(d, filename)
4927            if os.path.isfile(f):
4928                result.append(f)
4929        return result
4930
4931    def load_from_dirs(self, filename, dirs=None, include=[]):
4932        """
4933        Load conf file from dirs list.
4934        If dirs not specified try all the well known locations.
4935        Return False if no file was successfully loaded.
4936        """
4937        count = 0
4938        for f in self.find_in_dirs(filename, dirs):
4939            if self.load_file(f, include=include):
4940                count += 1
4941        return count != 0
4942
4943    def load_backend(self, dirs=None):
4944        """
4945        Load the backend configuration files from dirs list.
4946        If dirs not specified try all the well known locations.
4947        If a <backend>.conf file was found return it's full path name,
4948        if not found return None.
4949        """
4950        result = None
4951        if dirs is None:
4952            dirs = self.get_load_dirs()
4953        conf = document.backend + '.conf'
4954        conf2 = document.backend + '-' + document.doctype + '.conf'
4955        # First search for filter backends.
4956        for d in [os.path.join(d, 'backends', document.backend) for d in dirs]:
4957            if self.load_file(conf, d):
4958                result = os.path.join(d, conf)
4959            self.load_file(conf2, d)
4960        if not result:
4961            # Search in the normal locations.
4962            for d in dirs:
4963                if self.load_file(conf, d):
4964                    result = os.path.join(d, conf)
4965                self.load_file(conf2, d)
4966        return result
4967
4968    def load_filters(self, dirs=None):
4969        """
4970        Load filter configuration files from 'filters' directory in dirs list.
4971        If dirs not specified try all the well known locations.  Suppress
4972        loading if a file named __noautoload__ is in same directory as the conf
4973        file unless the filter has been specified with the --filter
4974        command-line option (in which case it is loaded unconditionally).
4975        """
4976        if dirs is None:
4977            dirs = self.get_load_dirs()
4978        for d in dirs:
4979            # Load filter .conf files.
4980            filtersdir = os.path.join(d, 'filters')
4981            for dirpath, dirnames, filenames in os.walk(filtersdir):
4982                subdirs = dirpath[len(filtersdir):].split(os.path.sep)
4983                # True if processing a filter specified by a --filter option.
4984                filter_opt = len(subdirs) > 1 and subdirs[1] in self.filters
4985                if '__noautoload__' not in filenames or filter_opt:
4986                    for f in filenames:
4987                        if re.match(r'^.+\.conf$', f):
4988                            self.load_file(f, dirpath)
4989
4990    def find_config_dir(self, *dirnames):
4991        """
4992        Return path of configuration directory.
4993        Try all the well known locations.
4994        Return None if directory not found.
4995        """
4996        for d in [os.path.join(d, *dirnames) for d in self.get_load_dirs()]:
4997            if os.path.isdir(d):
4998                return d
4999        return None
5000
5001    def set_theme_attributes(self):
5002        theme = document.attributes.get('theme')
5003        if theme and 'themedir' not in document.attributes:
5004            themedir = self.find_config_dir('themes', theme)
5005            if themedir:
5006                document.attributes['themedir'] = themedir
5007                iconsdir = os.path.join(themedir, 'icons')
5008                if 'data-uri' in document.attributes and os.path.isdir(iconsdir):
5009                    document.attributes['iconsdir'] = iconsdir
5010            else:
5011                message.warning('missing theme: %s' % theme, linenos=False)
5012
5013    def load_miscellaneous(self, d):
5014        """Set miscellaneous configuration entries from dictionary 'd'."""
5015        def set_if_int_ge(name, d, min_value):
5016            if name in d:
5017                try:
5018                    val = int(d[name])
5019                    if not val >= min_value:
5020                        raise ValueError("not >= " + str(min_value))
5021                    setattr(self, name, val)
5022                except ValueError:
5023                    raise EAsciiDoc('illegal [miscellaneous] %s entry' % name)
5024        set_if_int_ge('tabsize', d, 0)
5025        set_if_int_ge('textwidth', d, 1)  # DEPRECATED: Old tables only.
5026
5027        if 'pagewidth' in d:
5028            try:
5029                val = float(d['pagewidth'])
5030                self.pagewidth = val
5031            except ValueError:
5032                raise EAsciiDoc('illegal [miscellaneous] pagewidth entry')
5033
5034        if 'pageunits' in d:
5035            self.pageunits = d['pageunits']
5036        if 'outfilesuffix' in d:
5037            self.outfilesuffix = d['outfilesuffix']
5038        if 'newline' in d:
5039            # Convert escape sequences to their character values.
5040            self.newline = literal_eval('"' + d['newline'] + '"')
5041        if 'subsnormal' in d:
5042            self.subsnormal = parse_options(d['subsnormal'], SUBS_OPTIONS,
5043                                            'illegal [%s] %s: %s' % ('miscellaneous', 'subsnormal', d['subsnormal']))
5044        if 'subsverbatim' in d:
5045            self.subsverbatim = parse_options(d['subsverbatim'], SUBS_OPTIONS, 'illegal [%s] %s: %s'
5046                                              % ('miscellaneous', 'subsverbatim', d['subsverbatim']))
5047
5048    def validate(self):
5049        """Check the configuration for internal consistency. Called after all
5050        configuration files have been loaded."""
5051        message.linenos = False     # Disable document line numbers.
5052        # Heuristic to validate that at least one configuration file was loaded.
5053        if not self.specialchars or not self.tags or not lists:
5054            raise EAsciiDoc('incomplete configuration files')
5055        # Check special characters are only one character long.
5056        for k in list(self.specialchars.keys()):
5057            if len(k) != 1:
5058                raise EAsciiDoc('[specialcharacters] must be a single character: %s' % k)
5059        # Check all special words have a corresponding inline macro body.
5060        for macro in list(self.specialwords.values()):
5061            if not is_name(macro):
5062                raise EAsciiDoc('illegal special word name: %s' % macro)
5063            if macro not in self.sections:
5064                message.warning('missing special word macro: [%s]' % macro)
5065        # Check all text quotes have a corresponding tag.
5066        for q in list(self.quotes.keys())[:]:
5067            tag = self.quotes[q]
5068            if not tag:
5069                del self.quotes[q]  # Un-define quote.
5070            else:
5071                if tag[0] == '#':
5072                    tag = tag[1:]
5073                if tag not in self.tags:
5074                    message.warning('[quotes] %s missing tag definition: %s' % (q, tag))
5075        # Check all specialsections section names exist.
5076        for k, v in list(self.specialsections.items()):
5077            if not v:
5078                del self.specialsections[k]
5079            elif v not in self.sections:
5080                message.warning('missing specialsections section: [%s]' % v)
5081        paragraphs.validate()
5082        lists.validate()
5083        blocks.validate()
5084        tables_OLD.validate()
5085        tables.validate()
5086        macros.validate()
5087        message.linenos = None
5088
5089    def entries_section(self, section_name):
5090        """
5091        Return True if conf file section contains entries, not a markup
5092        template.
5093        """
5094        for name in self.ENTRIES_SECTIONS:
5095            if re.match(name, section_name):
5096                return True
5097        return False
5098
5099    def dump(self):
5100        """Dump configuration to stdout."""
5101        # Header.
5102        hdr = ''
5103        hdr = hdr + '#' + writer.newline
5104        hdr = hdr + '# Generated by AsciiDoc %s for %s %s.%s' % \
5105            (VERSION, document.backend, document.doctype, writer.newline)
5106        t = time.asctime(time.localtime(time.time()))
5107        hdr = hdr + '# %s%s' % (t, writer.newline)
5108        hdr = hdr + '#' + writer.newline
5109        sys.stdout.write(hdr)
5110        # Dump special sections.
5111        # Dump only the configuration file and command-line attributes.
5112        # [miscellaneous] entries are dumped as part of the [attributes].
5113        d = {}
5114        d.update(self.conf_attrs)
5115        d.update(self.cmd_attrs)
5116        dump_section('attributes', d)
5117        Title.dump()
5118        dump_section('quotes', self.quotes)
5119        dump_section('specialcharacters', self.specialchars)
5120        d = {}
5121        for k, v in list(self.specialwords.items()):
5122            if v in d:
5123                d[v] = '%s "%s"' % (d[v], k)   # Append word list.
5124            else:
5125                d[v] = '"%s"' % k
5126        dump_section('specialwords', d)
5127        dump_section('replacements', self.replacements)
5128        dump_section('replacements2', self.replacements2)
5129        dump_section('replacements3', self.replacements3)
5130        dump_section('specialsections', self.specialsections)
5131        d = {}
5132        for k, v in list(self.tags.items()):
5133            d[k] = '%s|%s' % v
5134        dump_section('tags', d)
5135        paragraphs.dump()
5136        lists.dump()
5137        blocks.dump()
5138        tables_OLD.dump()
5139        tables.dump()
5140        macros.dump()
5141        # Dump remaining sections.
5142        for k in list(self.sections.keys()):
5143            if not self.entries_section(k):
5144                sys.stdout.write('[%s]%s' % (k, writer.newline))
5145                for line in self.sections[k]:
5146                    sys.stdout.write('%s%s' % (line, writer.newline))
5147                sys.stdout.write(writer.newline)
5148
5149    def subs_section(self, section, d):
5150        """Section attribute substitution using attributes from
5151        document.attributes and 'd'.  Lines containing undefined
5152        attributes are deleted."""
5153        if section in self.sections:
5154            return subs_attrs(self.sections[section], d)
5155        else:
5156            message.warning('missing section: [%s]' % section)
5157            return ()
5158
5159    def parse_tags(self):
5160        """Parse [tags] section entries into self.tags dictionary."""
5161        d = {}
5162        parse_entries(self.sections.get('tags', ()), d)
5163        for k, v in list(d.items()):
5164            if v is None:
5165                if k in self.tags:
5166                    del self.tags[k]
5167            elif v == '':
5168                self.tags[k] = (None, None)
5169            else:
5170                mo = re.match(r'(?P<stag>.*)\|(?P<etag>.*)', v)
5171                if mo:
5172                    self.tags[k] = (mo.group('stag'), mo.group('etag'))
5173                else:
5174                    raise EAsciiDoc('[tag] %s value malformed' % k)
5175
5176    def tag(self, name, d=None):
5177        """Returns (starttag,endtag) tuple named name from configuration file
5178        [tags] section. Raise error if not found. If a dictionary 'd' is
5179        passed then merge with document attributes and perform attribute
5180        substitution on tags."""
5181        if name not in self.tags:
5182            raise EAsciiDoc('missing tag: %s' % name)
5183        stag, etag = self.tags[name]
5184        if d is not None:
5185            # TODO: Should we warn if substitution drops a tag?
5186            if stag:
5187                stag = subs_attrs(stag, d)
5188            if etag:
5189                etag = subs_attrs(etag, d)
5190        if stag is None:
5191            stag = ''
5192        if etag is None:
5193            etag = ''
5194        return (stag, etag)
5195
5196    def parse_specialsections(self):
5197        """Parse specialsections section to self.specialsections dictionary."""
5198        # TODO: This is virtually the same as parse_replacements() and should
5199        # be factored to single routine.
5200        d = {}
5201        parse_entries(self.sections.get('specialsections', ()), d, unquote=True)
5202        for pat, sectname in list(d.items()):
5203            pat = strip_quotes(pat)
5204            if not is_re(pat):
5205                raise EAsciiDoc('[specialsections] entry is not a valid regular expression: %s' % pat)
5206            if sectname is None:
5207                if pat in self.specialsections:
5208                    del self.specialsections[pat]
5209            else:
5210                self.specialsections[pat] = sectname
5211
5212    def parse_replacements(self, sect='replacements'):
5213        """Parse replacements section into self.replacements dictionary."""
5214        d = OrderedDict()
5215        parse_entries(self.sections.get(sect, ()), d, unquote=True)
5216        for pat, rep in list(d.items()):
5217            if not self.set_replacement(pat, rep, getattr(self, sect)):
5218                raise EAsciiDoc('[%s] entry in %s is not a valid '
5219                                'regular expression: %s' % (sect, self.fname, pat))
5220
5221    @staticmethod
5222    def set_replacement(pat, rep, replacements):
5223        """Add pattern and replacement to replacements dictionary."""
5224        pat = strip_quotes(pat)
5225        if not is_re(pat):
5226            return False
5227        if rep is None:
5228            if pat in replacements:
5229                del replacements[pat]
5230        else:
5231            replacements[pat] = strip_quotes(rep)
5232        return True
5233
5234    def subs_replacements(self, s, sect='replacements'):
5235        """Substitute patterns from self.replacements in 's'."""
5236        result = s
5237        for pat, rep in list(getattr(self, sect).items()):
5238            result = re.sub(pat, rep, result)
5239        return result
5240
5241    def parse_specialwords(self):
5242        """Parse special words section into self.specialwords dictionary."""
5243        reo = re.compile(r'(?:\s|^)(".+?"|[^"\s]+)(?=\s|$)')
5244        for line in self.sections.get('specialwords', ()):
5245            e = parse_entry(line)
5246            if not e:
5247                raise EAsciiDoc('[specialwords] entry in %s is malformed: %s' % (self.fname, line))
5248            name, wordlist = e
5249            if not is_name(name):
5250                raise EAsciiDoc('[specialwords] name in %s is illegal: %s' % (self.fname, name))
5251            if wordlist is None:
5252                # Un-define all words associated with 'name'.
5253                for k, v in list(self.specialwords.items()):
5254                    if v == name:
5255                        del self.specialwords[k]
5256            else:
5257                words = reo.findall(wordlist)
5258                for word in words:
5259                    word = strip_quotes(word)
5260                    if not is_re(word):
5261                        raise EAsciiDoc('[specialwords] entry in %s '
5262                                        'is not a valid regular expression: %s' % (self.fname, word))
5263                    self.specialwords[word] = name
5264
5265    def subs_specialchars(self, s):
5266        """Perform special character substitution on string 's'."""
5267        """It may seem like a good idea to escape special characters with a '\'
5268        character, the reason we don't is because the escape character itself
5269        then has to be escaped and this makes including code listings
5270        problematic. Use the predefined {amp},{lt},{gt} attributes instead."""
5271        result = ''
5272        for ch in s:
5273            result = result + self.specialchars.get(ch, ch)
5274        return result
5275
5276    def subs_specialchars_reverse(self, s):
5277        """Perform reverse special character substitution on string 's'."""
5278        result = s
5279        for k, v in list(self.specialchars.items()):
5280            result = result.replace(v, k)
5281        return result
5282
5283    def subs_specialwords(self, s):
5284        """Search for word patterns from self.specialwords in 's' and
5285        substitute using corresponding macro."""
5286        result = s
5287        for word in list(self.specialwords.keys()):
5288            result = re.sub(word, _subs_specialwords, result)
5289        return result
5290
5291    def expand_templates(self, entries):
5292        """Expand any template::[] macros in a list of section entries."""
5293        result = []
5294        for line in entries:
5295            mo = macros.match('+', r'template', line)
5296            if mo:
5297                s = mo.group('attrlist')
5298                if s in self.sections:
5299                    result += self.expand_templates(self.sections[s])
5300                else:
5301                    message.warning('missing section: [%s]' % s)
5302                    result.append(line)
5303            else:
5304                result.append(line)
5305        return result
5306
5307    def expand_all_templates(self):
5308        for k, v in list(self.sections.items()):
5309            self.sections[k] = self.expand_templates(v)
5310
5311    def section2tags(self, section, d={}, skipstart=False, skipend=False):
5312        """Perform attribute substitution on 'section' using document
5313        attributes plus 'd' attributes. Return tuple (stag,etag) containing
5314        pre and post | placeholder tags. 'skipstart' and 'skipend' are
5315        used to suppress substitution."""
5316        assert section is not None
5317        if section in self.sections:
5318            body = self.sections[section]
5319        else:
5320            message.warning('missing section: [%s]' % section)
5321            body = ()
5322        # Split macro body into start and end tag lists.
5323        stag = []
5324        etag = []
5325        in_stag = True
5326        for s in body:
5327            if in_stag:
5328                mo = re.match(r'(?P<stag>.*)\|(?P<etag>.*)', s)
5329                if mo:
5330                    if mo.group('stag'):
5331                        stag.append(mo.group('stag'))
5332                    if mo.group('etag'):
5333                        etag.append(mo.group('etag'))
5334                    in_stag = False
5335                else:
5336                    stag.append(s)
5337            else:
5338                etag.append(s)
5339        # Do attribute substitution last so {brkbar} can be used to escape |.
5340        # But don't do attribute substitution on title -- we've already done it.
5341        title = d.get('title')
5342        if title:
5343            d['title'] = chr(0)  # Replace with unused character.
5344        if not skipstart:
5345            stag = subs_attrs(stag, d)
5346        if not skipend:
5347            etag = subs_attrs(etag, d)
5348        # Put the {title} back.
5349        if title:
5350            stag = [x.replace(chr(0), title) for x in stag]
5351            etag = [x.replace(chr(0), title) for x in etag]
5352            d['title'] = title
5353        return (stag, etag)
5354
5355
5356# ---------------------------------------------------------------------------
5357# Deprecated old table classes follow.
5358# Naming convention is an _OLD name suffix.
5359# These will be removed from future versions of AsciiDoc
5360
5361def join_lines_OLD(lines):
5362    """Return a list in which lines terminated with the backslash line
5363    continuation character are joined."""
5364    result = []
5365    s = ''
5366    continuation = False
5367    for line in lines:
5368        if line and line[-1] == '\\':
5369            s = s + line[:-1]
5370            continuation = True
5371            continue
5372        if continuation:
5373            result.append(s + line)
5374            s = ''
5375            continuation = False
5376        else:
5377            result.append(line)
5378    if continuation:
5379        result.append(s)
5380    return result
5381
5382
5383class Column_OLD:
5384    """Table column."""
5385    def __init__(self):
5386        self.colalign = None    # 'left','right','center'
5387        self.rulerwidth = None
5388        self.colwidth = None    # Output width in page units.
5389
5390
5391class Table_OLD(AbstractBlock):
5392    COL_STOP = r"(`|'|\.)"  # RE.
5393    ALIGNMENTS = {'`': 'left', "'": 'right', '.': 'center'}
5394    FORMATS = ('fixed', 'csv', 'dsv')
5395
5396    def __init__(self):
5397        AbstractBlock.__init__(self)
5398        self.CONF_ENTRIES += ('template', 'fillchar', 'format', 'colspec',
5399                              'headrow', 'footrow', 'bodyrow', 'headdata',
5400                              'footdata', 'bodydata')
5401        # Configuration parameters.
5402        self.fillchar = None
5403        self.format = None    # 'fixed','csv','dsv'
5404        self.colspec = None
5405        self.headrow = None
5406        self.footrow = None
5407        self.bodyrow = None
5408        self.headdata = None
5409        self.footdata = None
5410        self.bodydata = None
5411        # Calculated parameters.
5412        self.underline = None     # RE matching current table underline.
5413        self.isnumeric = False    # True if numeric ruler.
5414        self.tablewidth = None    # Optional table width scale factor.
5415        self.columns = []         # List of Columns.
5416        # Other.
5417        self.check_msg = ''       # Message set by previous self.validate() call.
5418
5419    def load(self, name, entries):
5420        AbstractBlock.load(self, name, entries)
5421        """Update table definition from section entries in 'entries'."""
5422        for k, v in list(entries.items()):
5423            if k == 'fillchar':
5424                if v and len(v) == 1:
5425                    self.fillchar = v
5426                else:
5427                    raise EAsciiDoc('malformed table fillchar: %s' % v)
5428            elif k == 'format':
5429                if v in Table_OLD.FORMATS:
5430                    self.format = v
5431                else:
5432                    raise EAsciiDoc('illegal table format: %s' % v)
5433            elif k == 'colspec':
5434                self.colspec = v
5435            elif k == 'headrow':
5436                self.headrow = v
5437            elif k == 'footrow':
5438                self.footrow = v
5439            elif k == 'bodyrow':
5440                self.bodyrow = v
5441            elif k == 'headdata':
5442                self.headdata = v
5443            elif k == 'footdata':
5444                self.footdata = v
5445            elif k == 'bodydata':
5446                self.bodydata = v
5447
5448    def dump(self):
5449        AbstractBlock.dump(self)
5450        write = lambda s: sys.stdout.write('%s%s' % (s, writer.newline))
5451        write('fillchar=' + self.fillchar)
5452        write('format=' + self.format)
5453        if self.colspec:
5454            write('colspec=' + self.colspec)
5455        if self.headrow:
5456            write('headrow=' + self.headrow)
5457        if self.footrow:
5458            write('footrow=' + self.footrow)
5459        write('bodyrow=' + self.bodyrow)
5460        if self.headdata:
5461            write('headdata=' + self.headdata)
5462        if self.footdata:
5463            write('footdata=' + self.footdata)
5464        write('bodydata=' + self.bodydata)
5465        write('')
5466
5467    def validate(self):
5468        AbstractBlock.validate(self)
5469        """Check table definition and set self.check_msg if invalid else set
5470        self.check_msg to blank string."""
5471        # Check global table parameters.
5472        if config.textwidth is None:
5473            self.check_msg = 'missing [miscellaneous] textwidth entry'
5474        elif config.pagewidth is None:
5475            self.check_msg = 'missing [miscellaneous] pagewidth entry'
5476        elif config.pageunits is None:
5477            self.check_msg = 'missing [miscellaneous] pageunits entry'
5478        elif self.headrow is None:
5479            self.check_msg = 'missing headrow entry'
5480        elif self.footrow is None:
5481            self.check_msg = 'missing footrow entry'
5482        elif self.bodyrow is None:
5483            self.check_msg = 'missing bodyrow entry'
5484        elif self.headdata is None:
5485            self.check_msg = 'missing headdata entry'
5486        elif self.footdata is None:
5487            self.check_msg = 'missing footdata entry'
5488        elif self.bodydata is None:
5489            self.check_msg = 'missing bodydata entry'
5490        else:
5491            # No errors.
5492            self.check_msg = ''
5493
5494    def isnext(self):
5495        return AbstractBlock.isnext(self)
5496
5497    def parse_ruler(self, ruler):
5498        """Parse ruler calculating underline and ruler column widths."""
5499        fc = re.escape(self.fillchar)
5500        # Strip and save optional tablewidth from end of ruler.
5501        mo = re.match(r'^(.*' + fc + r'+)([\d\.]+)$', ruler)
5502        if mo:
5503            ruler = mo.group(1)
5504            self.tablewidth = float(mo.group(2))
5505            self.attributes['tablewidth'] = str(float(self.tablewidth))
5506        else:
5507            self.tablewidth = None
5508            self.attributes['tablewidth'] = '100.0'
5509        # Guess whether column widths are specified numerically or not.
5510        if ruler[1] != self.fillchar:
5511            # If the first column does not start with a fillchar then numeric.
5512            self.isnumeric = True
5513        elif ruler[1:] == self.fillchar * len(ruler[1:]):
5514            # The case of one column followed by fillchars is numeric.
5515            self.isnumeric = True
5516        else:
5517            self.isnumeric = False
5518        # Underlines must be 3 or more fillchars.
5519        self.underline = r'^' + fc + r'{3,}$'
5520        splits = re.split(self.COL_STOP, ruler)[1:]
5521        # Build self.columns.
5522        for i in range(0, len(splits), 2):
5523            c = Column_OLD()
5524            c.colalign = self.ALIGNMENTS[splits[i]]
5525            s = splits[i + 1]
5526            if self.isnumeric:
5527                # Strip trailing fillchars.
5528                s = re.sub(fc + r'+$', '', s)
5529                if s == '':
5530                    c.rulerwidth = None
5531                else:
5532                    try:
5533                        val = int(s)
5534                        if not val > 0:
5535                            raise ValueError('not > 0')
5536                        c.rulerwidth = val
5537                    except ValueError:
5538                        raise EAsciiDoc('malformed ruler: bad width')
5539            else:   # Calculate column width from inter-fillchar intervals.
5540                if not re.match(r'^' + fc + r'+$', s):
5541                    raise EAsciiDoc('malformed ruler: illegal fillchars')
5542                c.rulerwidth = len(s) + 1
5543            self.columns.append(c)
5544        # Fill in unspecified ruler widths.
5545        if self.isnumeric:
5546            if self.columns[0].rulerwidth is None:
5547                prevwidth = 1
5548            for c in self.columns:
5549                if c.rulerwidth is None:
5550                    c.rulerwidth = prevwidth
5551                prevwidth = c.rulerwidth
5552
5553    def build_colspecs(self):
5554        """Generate colwidths and colspecs. This can only be done after the
5555        table arguments have been parsed since we use the table format."""
5556        self.attributes['cols'] = len(self.columns)
5557        # Calculate total ruler width.
5558        totalwidth = 0
5559        for c in self.columns:
5560            totalwidth = totalwidth + c.rulerwidth
5561        if totalwidth <= 0:
5562            raise EAsciiDoc('zero width table')
5563        # Calculate marked up colwidths from rulerwidths.
5564        for c in self.columns:
5565            # Convert ruler width to output page width.
5566            width = float(c.rulerwidth)
5567            if self.format == 'fixed':
5568                if self.tablewidth is None:
5569                    # Size proportional to ruler width.
5570                    colfraction = width / config.textwidth
5571                else:
5572                    # Size proportional to page width.
5573                    colfraction = width / totalwidth
5574            else:
5575                    # Size proportional to page width.
5576                colfraction = width / totalwidth
5577            c.colwidth = colfraction * config.pagewidth  # To page units.
5578            if self.tablewidth is not None:
5579                c.colwidth = c.colwidth * self.tablewidth   # Scale factor.
5580                if self.tablewidth > 1:
5581                    c.colwidth = c.colwidth / 100  # tablewidth is in percent.
5582        # Build colspecs.
5583        if self.colspec:
5584            cols = []
5585            i = 0
5586            for c in self.columns:
5587                i += 1
5588                self.attributes['colalign'] = c.colalign
5589                self.attributes['colwidth'] = str(int(c.colwidth))
5590                self.attributes['colnumber'] = str(i + 1)
5591                s = subs_attrs(self.colspec, self.attributes)
5592                if not s:
5593                    message.warning('colspec dropped: contains undefined attribute')
5594                else:
5595                    cols.append(s)
5596            self.attributes['colspecs'] = writer.newline.join(cols)
5597
5598    def split_rows(self, rows):
5599        """Return a two item tuple containing a list of lines up to but not
5600        including the next underline (continued lines are joined ) and the
5601        tuple of all lines after the underline."""
5602        reo = re.compile(self.underline)
5603        i = 0
5604        while not reo.match(rows[i]):
5605            i = i + 1
5606        if i == 0:
5607            raise EAsciiDoc('missing table rows')
5608        if i >= len(rows):
5609            raise EAsciiDoc('closing [%s] underline expected' % self.defname)
5610        return (join_lines_OLD(rows[:i]), rows[i + 1:])
5611
5612    def parse_rows(self, rows, rtag, dtag):
5613        """Parse rows list using the row and data tags. Returns a substituted
5614        list of output lines."""
5615        result = []
5616        # Source rows are parsed as single block, rather than line by line, to
5617        # allow the CSV reader to handle multi-line rows.
5618        if self.format == 'fixed':
5619            rows = self.parse_fixed(rows)
5620        elif self.format == 'csv':
5621            rows = self.parse_csv(rows)
5622        elif self.format == 'dsv':
5623            rows = self.parse_dsv(rows)
5624        else:
5625            assert True, 'illegal table format'
5626        # Substitute and indent all data in all rows.
5627        stag, etag = subs_tag(rtag, self.attributes)
5628        for row in rows:
5629            result.append('  ' + stag)
5630            for data in self.subs_row(row, dtag):
5631                result.append('    ' + data)
5632            result.append('  ' + etag)
5633        return result
5634
5635    def subs_row(self, data, dtag):
5636        """Substitute the list of source row data elements using the data tag.
5637        Returns a substituted list of output table data items."""
5638        result = []
5639        if len(data) < len(self.columns):
5640            message.warning('fewer row data items then table columns')
5641        if len(data) > len(self.columns):
5642            message.warning('more row data items than table columns')
5643        for i in range(len(self.columns)):
5644            if i > len(data) - 1:
5645                d = ''  # Fill missing column data with blanks.
5646            else:
5647                d = data[i]
5648            c = self.columns[i]
5649            self.attributes['colalign'] = c.colalign
5650            self.attributes['colwidth'] = str(int(c.colwidth))
5651            self.attributes['colnumber'] = str(i + 1)
5652            stag, etag = subs_tag(dtag, self.attributes)
5653            # Insert AsciiDoc line break (' +') where row data has newlines
5654            # ('\n').  This is really only useful when the table format is csv
5655            # and the output markup is HTML. It's also a bit dubious in that it
5656            # assumes the user has not modified the shipped line break pattern.
5657            subs = self.get_subs()[0]
5658            if 'replacements2' in subs:
5659                # Insert line breaks in cell data.
5660                d = re.sub(r'(?m)\n', r' +\n', d)
5661                d = d.split('\n')    # So writer.newline is written.
5662            else:
5663                d = [d]
5664            result = result + [stag] + Lex.subs(d, subs) + [etag]
5665        return result
5666
5667    def parse_fixed(self, rows):
5668        """Parse the list of source table rows. Each row item in the returned
5669        list contains a list of cell data elements."""
5670        result = []
5671        for row in rows:
5672            data = []
5673            start = 0
5674            for c in self.columns:
5675                end = start + c.rulerwidth
5676                if c is self.columns[-1]:
5677                    # Text in last column can continue forever.
5678                    # Use the encoded string to slice, but convert back
5679                    # to plain string before further processing
5680                    data.append(row[start:].strip())
5681                else:
5682                    data.append(row[start:end].strip())
5683                start = end
5684            result.append(data)
5685        return result
5686
5687    @staticmethod
5688    def parse_csv(rows):
5689        """Parse the list of source table rows. Each row item in the returned
5690        list contains a list of cell data elements."""
5691        result = []
5692        rdr = csv.reader(io.StringIO(DEFAULT_NEWLINE.join(rows)), skipinitialspace=True)
5693        try:
5694            for row in rdr:
5695                result.append(row)
5696        except Exception:
5697            raise EAsciiDoc('csv parse error: %s' % row)
5698        return result
5699
5700    def parse_dsv(self, rows):
5701        """Parse the list of source table rows. Each row item in the returned
5702        list contains a list of cell data elements."""
5703        separator = self.attributes.get('separator', ':')
5704        separator = literal_eval('"' + separator + '"')
5705        if len(separator) != 1:
5706            raise EAsciiDoc('malformed dsv separator: %s' % separator)
5707        # TODO: If separator is preceded by an odd number of backslashes then
5708        # it is escaped and should not delimit.
5709        result = []
5710        for row in rows:
5711            # Skip blank lines
5712            if row == '':
5713                continue
5714            # Un-escape escaped characters.
5715            row = literal_eval('"' + row.replace('"', '\\"') + '"')
5716            data = row.split(separator)
5717            data = [s.strip() for s in data]
5718            result.append(data)
5719        return result
5720
5721    def translate(self):
5722        message.deprecated('old tables syntax')
5723        AbstractBlock.translate(self)
5724        # Reset instance specific properties.
5725        self.underline = None
5726        self.columns = []
5727        attrs = {}
5728        BlockTitle.consume(attrs)
5729        # Add relevant globals to table substitutions.
5730        attrs['pagewidth'] = str(config.pagewidth)
5731        attrs['pageunits'] = config.pageunits
5732        # Mix in document attribute list.
5733        AttributeList.consume(attrs)
5734        # Validate overridable attributes.
5735        for k, v in list(attrs.items()):
5736            if k == 'format':
5737                if v not in self.FORMATS:
5738                    raise EAsciiDoc('illegal [%s] %s: %s' % (self.defname, k, v))
5739                self.format = v
5740            elif k == 'tablewidth':
5741                try:
5742                    self.tablewidth = float(attrs['tablewidth'])
5743                except Exception:
5744                    raise EAsciiDoc('illegal [%s] %s: %s' % (self.defname, k, v))
5745        self.merge_attributes(attrs)
5746        # Parse table ruler.
5747        ruler = reader.read()
5748        assert re.match(self.delimiter, ruler)
5749        self.parse_ruler(ruler)
5750        # Read the entire table.
5751        table = []
5752        while True:
5753            line = reader.read_next()
5754            # Table terminated by underline followed by a blank line or EOF.
5755            if len(table) > 0 and re.match(self.underline, table[-1]):
5756                if line in ('', None):
5757                    break
5758            if line is None:
5759                raise EAsciiDoc('closing [%s] underline expected' % self.defname)
5760            table.append(reader.read())
5761        # EXPERIMENTAL: The number of lines in the table, requested by Benjamin Klum.
5762        self.attributes['rows'] = str(len(table))
5763        if self.check_msg:  # Skip if table definition was marked invalid.
5764            message.warning('skipping [%s] table: %s' % (self.defname, self.check_msg))
5765            return
5766        self.push_blockname('table')
5767        # Generate colwidths and colspecs.
5768        self.build_colspecs()
5769        # Generate headrows, footrows, bodyrows.
5770        # Headrow, footrow and bodyrow data replaces same named attributes in
5771        # the table markup template. In order to ensure this data does not get
5772        # a second attribute substitution (which would interfere with any
5773        # already substituted inline passthroughs) unique placeholders are used
5774        # (the tab character does not appear elsewhere since it is expanded on
5775        # input) which are replaced after template attribute substitution.
5776        headrows = footrows = []
5777        bodyrows, table = self.split_rows(table)
5778        if table:
5779            headrows = bodyrows
5780            bodyrows, table = self.split_rows(table)
5781            if table:
5782                footrows, table = self.split_rows(table)
5783        if headrows:
5784            headrows = self.parse_rows(headrows, self.headrow, self.headdata)
5785            headrows = writer.newline.join(headrows)
5786            self.attributes['headrows'] = '\x07headrows\x07'
5787        if footrows:
5788            footrows = self.parse_rows(footrows, self.footrow, self.footdata)
5789            footrows = writer.newline.join(footrows)
5790            self.attributes['footrows'] = '\x07footrows\x07'
5791        bodyrows = self.parse_rows(bodyrows, self.bodyrow, self.bodydata)
5792        bodyrows = writer.newline.join(bodyrows)
5793        self.attributes['bodyrows'] = '\x07bodyrows\x07'
5794        table = subs_attrs(config.sections[self.template], self.attributes)
5795        table = writer.newline.join(table)
5796        # Before we finish replace the table head, foot and body place holders
5797        # with the real data.
5798        if headrows:
5799            table = table.replace('\x07headrows\x07', headrows, 1)
5800        if footrows:
5801            table = table.replace('\x07footrows\x07', footrows, 1)
5802        table = table.replace('\x07bodyrows\x07', bodyrows, 1)
5803        writer.write(table, trace='table')
5804        self.pop_blockname()
5805
5806
5807class Tables_OLD(AbstractBlocks):
5808    """List of tables."""
5809    BLOCK_TYPE = Table_OLD
5810    PREFIX = 'old_tabledef-'
5811
5812    def __init__(self):
5813        AbstractBlocks.__init__(self)
5814
5815    def load(self, sections):
5816        AbstractBlocks.load(self, sections)
5817
5818    def validate(self):
5819        # Does not call AbstractBlocks.validate().
5820        # Check we have a default table definition,
5821        for i in range(len(self.blocks)):
5822            if self.blocks[i].defname == 'old_tabledef-default':
5823                default = self.blocks[i]
5824                break
5825        else:
5826            raise EAsciiDoc('missing section: [OLD_tabledef-default]')
5827        # Set default table defaults.
5828        if default.format is None:
5829            default.subs = 'fixed'
5830        # Propagate defaults to unspecified table parameters.
5831        for b in self.blocks:
5832            if b is not default:
5833                if b.fillchar is None:
5834                    b.fillchar = default.fillchar
5835                if b.format is None:
5836                    b.format = default.format
5837                if b.template is None:
5838                    b.template = default.template
5839                if b.colspec is None:
5840                    b.colspec = default.colspec
5841                if b.headrow is None:
5842                    b.headrow = default.headrow
5843                if b.footrow is None:
5844                    b.footrow = default.footrow
5845                if b.bodyrow is None:
5846                    b.bodyrow = default.bodyrow
5847                if b.headdata is None:
5848                    b.headdata = default.headdata
5849                if b.footdata is None:
5850                    b.footdata = default.footdata
5851                if b.bodydata is None:
5852                    b.bodydata = default.bodydata
5853        # Check all tables have valid fill character.
5854        for b in self.blocks:
5855            if not b.fillchar or len(b.fillchar) != 1:
5856                raise EAsciiDoc('[%s] missing or illegal fillchar' % b.defname)
5857        # Build combined tables delimiter patterns and assign defaults.
5858        delimiters = []
5859        for b in self.blocks:
5860            # Ruler is:
5861            #   (ColStop,(ColWidth,FillChar+)?)+, FillChar+, TableWidth?
5862            b.delimiter = r'^(' + Table_OLD.COL_STOP \
5863                + r'(\d*|' + re.escape(b.fillchar) + r'*)' \
5864                + r')+' \
5865                + re.escape(b.fillchar) + r'+' \
5866                + '([\d\.]*)$'
5867            delimiters.append(b.delimiter)
5868            if not b.headrow:
5869                b.headrow = b.bodyrow
5870            if not b.footrow:
5871                b.footrow = b.bodyrow
5872            if not b.headdata:
5873                b.headdata = b.bodydata
5874            if not b.footdata:
5875                b.footdata = b.bodydata
5876        self.delimiters = re_join(delimiters)
5877        # Check table definitions are valid.
5878        for b in self.blocks:
5879            b.validate()
5880            if config.verbose:
5881                if b.check_msg:
5882                    message.warning('[%s] table definition: %s' % (b.defname, b.check_msg))
5883
5884
5885# End of deprecated old table classes.
5886# ---------------------------------------------------------------------------
5887
5888# ---------------------------------------------------------------------------
5889# filter and theme plugin commands.
5890# ---------------------------------------------------------------------------
5891def die(msg):
5892    message.stderr(msg)
5893    sys.exit(1)
5894
5895
5896def extract_zip(zip_file, destdir):
5897    """
5898    Unzip Zip file to destination directory.
5899    Throws exception if error occurs.
5900    """
5901    zipo = zipfile.ZipFile(zip_file, 'r')
5902    try:
5903        for zi in zipo.infolist():
5904            outfile = zi.filename
5905            if not outfile.endswith('/'):
5906                d, outfile = os.path.split(outfile)
5907                directory = os.path.normpath(os.path.join(destdir, d))
5908                if not os.path.isdir(directory):
5909                    os.makedirs(directory)
5910                outfile = os.path.join(directory, outfile)
5911                perms = (zi.external_attr >> 16) & 0o777
5912                message.verbose('extracting: %s' % outfile)
5913                flags = os.O_CREAT | os.O_WRONLY
5914                if sys.platform == 'win32':
5915                    flags |= os.O_BINARY
5916                if perms == 0:
5917                    # Zip files created under Windows do not include permissions.
5918                    fh = os.open(outfile, flags)
5919                else:
5920                    fh = os.open(outfile, flags, perms)
5921                try:
5922                    os.write(fh, zipo.read(zi.filename))
5923                finally:
5924                    os.close(fh)
5925    finally:
5926        zipo.close()
5927
5928
5929def create_zip(zip_file, src, skip_hidden=False):
5930    """
5931    Create Zip file. If src is a directory archive all contained files and
5932    subdirectories, if src is a file archive the src file.
5933    Files and directories names starting with . are skipped
5934    if skip_hidden is True.
5935    Throws exception if error occurs.
5936    """
5937    zipo = zipfile.ZipFile(zip_file, 'w')
5938    try:
5939        if os.path.isfile(src):
5940            arcname = os.path.basename(src)
5941            message.verbose('archiving: %s' % arcname)
5942            zipo.write(src, arcname, zipfile.ZIP_DEFLATED)
5943        elif os.path.isdir(src):
5944            srcdir = os.path.abspath(src)
5945            if srcdir[-1] != os.path.sep:
5946                srcdir += os.path.sep
5947            for root, dirs, files in os.walk(srcdir):
5948                arcroot = os.path.abspath(root)[len(srcdir):]
5949                if skip_hidden:
5950                    for d in dirs[:]:
5951                        if d.startswith('.'):
5952                            message.verbose('skipping: %s' % os.path.join(arcroot, d))
5953                            del dirs[dirs.index(d)]
5954                for f in files:
5955                    filename = os.path.join(root, f)
5956                    arcname = os.path.join(arcroot, f)
5957                    if skip_hidden and f.startswith('.'):
5958                        message.verbose('skipping: %s' % arcname)
5959                        continue
5960                    message.verbose('archiving: %s' % arcname)
5961                    zipo.write(filename, arcname, zipfile.ZIP_DEFLATED)
5962        else:
5963            raise ValueError('src must specify directory or file: %s' % src)
5964    finally:
5965        zipo.close()
5966
5967
5968class Plugin:
5969    """
5970    --filter and --theme option commands.
5971    """
5972    CMDS = ('install', 'remove', 'list', 'build')
5973
5974    type = None     # 'backend', 'filter' or 'theme'.
5975
5976    @staticmethod
5977    def get_dir():
5978        """
5979        Return plugins path (.asciidoc/filters or .asciidoc/themes) in user's
5980        home directory or None if user home not defined.
5981        """
5982        result = userdir()
5983        if result:
5984            result = os.path.join(result, '.asciidoc', Plugin.type + 's')
5985        return result
5986
5987    @staticmethod
5988    def install(args):
5989        """
5990        Install plugin Zip file.
5991        args[0] is plugin zip file path.
5992        args[1] is optional destination plugins directory.
5993        """
5994        if len(args) not in (1, 2):
5995            die('invalid number of arguments: --%s install %s' % (Plugin.type, ' '.join(args)))
5996        zip_file = args[0]
5997        if not os.path.isfile(zip_file):
5998            die('file not found: %s' % zip_file)
5999        reo = re.match(r'^\w+', os.path.split(zip_file)[1])
6000        if not reo:
6001            die('file name does not start with legal %s name: %s' % (Plugin.type, zip_file))
6002        plugin_name = reo.group()
6003        if len(args) == 2:
6004            plugins_dir = args[1]
6005            if not os.path.isdir(plugins_dir):
6006                die('directory not found: %s' % plugins_dir)
6007        else:
6008            plugins_dir = Plugin.get_dir()
6009            if not plugins_dir:
6010                die('user home directory is not defined')
6011        plugin_dir = os.path.join(plugins_dir, plugin_name)
6012        if os.path.exists(plugin_dir):
6013            die('%s is already installed: %s' % (Plugin.type, plugin_dir))
6014        try:
6015            os.makedirs(plugin_dir)
6016        except Exception as e:
6017            die('failed to create %s directory: %s' % (Plugin.type, str(e)))
6018        try:
6019            extract_zip(zip_file, plugin_dir)
6020        except Exception as e:
6021            if os.path.isdir(plugin_dir):
6022                shutil.rmtree(plugin_dir)
6023            die('failed to extract %s: %s' % (Plugin.type, str(e)))
6024
6025    @staticmethod
6026    def remove(args):
6027        """
6028        Delete plugin directory.
6029        args[0] is plugin name.
6030        args[1] is optional plugin directory (defaults to ~/.asciidoc/<plugin_name>).
6031        """
6032        if len(args) not in (1, 2):
6033            die('invalid number of arguments: --%s remove %s' % (Plugin.type, ' '.join(args)))
6034        plugin_name = args[0]
6035        if not re.match(r'^\w+$', plugin_name):
6036            die('illegal %s name: %s' % (Plugin.type, plugin_name))
6037        if len(args) == 2:
6038            d = args[1]
6039            if not os.path.isdir(d):
6040                die('directory not found: %s' % d)
6041        else:
6042            d = Plugin.get_dir()
6043            if not d:
6044                die('user directory is not defined')
6045        plugin_dir = os.path.join(d, plugin_name)
6046        if not os.path.isdir(plugin_dir):
6047            die('cannot find %s: %s' % (Plugin.type, plugin_dir))
6048        try:
6049            message.verbose('removing: %s' % plugin_dir)
6050            shutil.rmtree(plugin_dir)
6051        except Exception as e:
6052            die('failed to delete %s: %s' % (Plugin.type, str(e)))
6053
6054    @staticmethod
6055    def list(args):
6056        """
6057        List all plugin directories (global and local).
6058        """
6059        for d in [os.path.join(d, Plugin.type + 's') for d in config.get_load_dirs()]:
6060            if os.path.isdir(d):
6061                for f in sorted(filter(os.path.isdir, [os.path.join(d, o) for o in os.listdir(d)])):
6062                    message.stdout(os.path.join(d, f))
6063
6064    @staticmethod
6065    def build(args):
6066        """
6067        Create plugin Zip file.
6068        args[0] is Zip file name.
6069        args[1] is plugin directory.
6070        """
6071        if len(args) != 2:
6072            die('invalid number of arguments: --%s build %s' % (Plugin.type, ' '.join(args)))
6073        zip_file = args[0]
6074        plugin_source = args[1]
6075        if not (os.path.isdir(plugin_source) or os.path.isfile(plugin_source)):
6076            die('plugin source not found: %s' % plugin_source)
6077        try:
6078            create_zip(zip_file, plugin_source, skip_hidden=True)
6079        except Exception as e:
6080            die('failed to create %s: %s' % (zip_file, str(e)))
6081
6082
6083# ---------------------------------------------------------------------------
6084# Application code.
6085# ---------------------------------------------------------------------------
6086# Constants
6087# ---------
6088APP_FILE = None             # This file's full path.
6089APP_DIR = None              # This file's directory.
6090USER_DIR = None             # ~/.asciidoc
6091# Global configuration files directory (set by Makefile build target).
6092CONF_DIR = '/usr/local/etc/asciidoc'
6093HELP_FILE = 'help.conf'     # Default (English) help file.
6094
6095# Globals
6096# -------
6097document = Document()       # The document being processed.
6098config = Config()           # Configuration file reader.
6099reader = Reader()           # Input stream line reader.
6100writer = Writer()           # Output stream line writer.
6101message = Message()         # Message functions.
6102paragraphs = Paragraphs()   # Paragraph definitions.
6103lists = Lists()             # List definitions.
6104blocks = DelimitedBlocks()  # DelimitedBlock definitions.
6105tables_OLD = Tables_OLD()   # Table_OLD definitions.
6106tables = Tables()           # Table definitions.
6107macros = Macros()           # Macro definitions.
6108calloutmap = CalloutMap()   # Coordinates callouts and callout list.
6109trace = Trace()             # Implements trace attribute processing.
6110
6111# Used by asciidocapi.py #
6112# List of message strings written to stderr.
6113messages = message.messages
6114
6115
6116def asciidoc(backend, doctype, confiles, infile, outfile, options):
6117    """Convert AsciiDoc document to DocBook document of type doctype
6118    The AsciiDoc document is read from file object src the translated
6119    DocBook file written to file object dst."""
6120    def load_conffiles(include=[], exclude=[]):
6121        # Load conf files specified on the command-line and by the conf-files attribute.
6122        files = document.attributes.get('conf-files', '')
6123        files = [f.strip() for f in files.split('|') if f.strip()]
6124        files += confiles
6125        if files:
6126            for f in files:
6127                if os.path.isfile(f):
6128                    config.load_file(f, include=include, exclude=exclude)
6129                else:
6130                    raise EAsciiDoc('missing configuration file: %s' % f)
6131    try:
6132        document.attributes['python'] = sys.executable
6133        for f in config.filters:
6134            if not config.find_config_dir('filters', f):
6135                raise EAsciiDoc('missing filter: %s' % f)
6136        if doctype not in (None, 'article', 'manpage', 'book'):
6137            raise EAsciiDoc('illegal document type')
6138        # Set processing options.
6139        for o in options:
6140            if o == '-c':
6141                config.dumping = True
6142            if o == '-s':
6143                config.header_footer = False
6144            if o == '-v':
6145                config.verbose = True
6146        document.update_attributes()
6147        if '-e' not in options:
6148            # Load asciidoc.conf files in two passes: the first for attributes
6149            # the second for everything. This is so that locally set attributes
6150            # available are in the global asciidoc.conf
6151            if not config.load_from_dirs('asciidoc.conf', include=['attributes']):
6152                raise EAsciiDoc('configuration file asciidoc.conf missing')
6153            load_conffiles(include=['attributes'])
6154            config.load_from_dirs('asciidoc.conf')
6155            if infile != '<stdin>':
6156                indir = os.path.dirname(infile)
6157                config.load_file('asciidoc.conf', indir, include=['attributes', 'titles', 'specialchars'])
6158        else:
6159            load_conffiles(include=['attributes', 'titles', 'specialchars'])
6160        document.update_attributes()
6161        # Check the infile exists.
6162        if infile != '<stdin>':
6163            if not os.path.isfile(infile):
6164                raise EAsciiDoc('input file %s missing' % infile)
6165        document.infile = infile
6166        AttributeList.initialize()
6167        # Open input file and parse document header.
6168        reader.tabsize = config.tabsize
6169        reader.open(infile)
6170        has_header = document.parse_header(doctype, backend)
6171        # doctype is now finalized.
6172        document.attributes['doctype-' + document.doctype] = ''
6173        config.set_theme_attributes()
6174        # Load backend configuration files.
6175        if '-e' not in options:
6176            f = document.backend + '.conf'
6177            conffile = config.load_backend()
6178            if not conffile:
6179                raise EAsciiDoc('missing backend conf file: %s' % f)
6180            document.attributes['backend-confdir'] = os.path.dirname(conffile)
6181        # backend is now known.
6182        document.attributes['backend-' + document.backend] = ''
6183        document.attributes[document.backend + '-' + document.doctype] = ''
6184        doc_conffiles = []
6185        if '-e' not in options:
6186            # Load filters and language file.
6187            config.load_filters()
6188            document.load_lang()
6189            if infile != '<stdin>':
6190                # Load local conf files (files in the source file directory).
6191                config.load_file('asciidoc.conf', indir)
6192                config.load_backend([indir])
6193                config.load_filters([indir])
6194                # Load document specific configuration files.
6195                f = os.path.splitext(infile)[0]
6196                doc_conffiles = [
6197                    f for f in (f + '.conf', f + '-' + document.backend + '.conf')
6198                    if os.path.isfile(f)
6199                ]
6200                for f in doc_conffiles:
6201                    config.load_file(f)
6202        load_conffiles()
6203        # Build asciidoc-args attribute.
6204        args = ''
6205        # Add custom conf file arguments.
6206        for f in doc_conffiles + confiles:
6207            args += ' --conf-file "%s"' % f
6208        # Add command-line and header attributes.
6209        attrs = {}
6210        attrs.update(AttributeEntry.attributes)
6211        attrs.update(config.cmd_attrs)
6212        if 'title' in attrs:    # Don't pass the header title.
6213            del attrs['title']
6214        for k, v in list(attrs.items()):
6215            if v:
6216                args += ' --attribute "%s=%s"' % (k, v)
6217            else:
6218                args += ' --attribute "%s"' % k
6219        document.attributes['asciidoc-args'] = args
6220        # Build outfile name.
6221        if outfile is None:
6222            outfile = os.path.splitext(infile)[0] + '.' + document.backend
6223            if config.outfilesuffix:
6224                # Change file extension.
6225                outfile = os.path.splitext(outfile)[0] + config.outfilesuffix
6226        document.outfile = outfile
6227        # Document header attributes override conf file attributes.
6228        document.attributes.update(AttributeEntry.attributes)
6229        document.update_attributes()
6230        # Set the default embedded icons directory.
6231        if 'data-uri' in document.attributes and not os.path.isdir(document.attributes['iconsdir']):
6232            document.attributes['iconsdir'] = os.path.join(document.attributes['asciidoc-confdir'], 'images/icons')
6233        # Configuration is fully loaded.
6234        config.expand_all_templates()
6235        # Check configuration for consistency.
6236        config.validate()
6237        # Initialize top level block name.
6238        if document.attributes.get('blockname'):
6239            AbstractBlock.blocknames.append(document.attributes['blockname'])
6240        paragraphs.initialize()
6241        lists.initialize()
6242        if config.dumping:
6243            config.dump()
6244        else:
6245            writer.newline = config.newline
6246            try:
6247                writer.open(outfile, reader.bom)
6248                try:
6249                    document.translate(has_header)  # Generate the output.
6250                finally:
6251                    writer.close()
6252            finally:
6253                reader.closefile()
6254    except BaseException as e:
6255        # Cleanup.
6256        if outfile and outfile != '<stdout>' and os.path.isfile(outfile):
6257            os.unlink(outfile)
6258        if not isinstance(e, Exception):
6259            raise
6260        # Build and print error description.
6261        msg = 'FAILED: '
6262        if reader.cursor:
6263            msg = message.format('', msg)
6264        if isinstance(e, EAsciiDoc):
6265            message.stderr('%s%s' % (msg, str(e)))
6266        else:
6267            if __name__ == '__main__':
6268                message.stderr(msg + 'unexpected error:')
6269                message.stderr('-' * 60)
6270                traceback.print_exc(file=sys.stderr)
6271                message.stderr('-' * 60)
6272            else:
6273                message.stderr('%sunexpected error: %s' % (msg, str(e)))
6274        sys.exit(1)
6275
6276
6277def usage(msg=''):
6278    if msg:
6279        message.stderr(msg)
6280    show_help('default', sys.stderr)
6281
6282
6283def show_help(topic, f=None):
6284    """Print help topic to file object f."""
6285    if f is None:
6286        f = sys.stdout
6287    # Select help file.
6288    lang = config.cmd_attrs.get('lang')
6289    if lang and lang != 'en':
6290        help_file = 'help-' + lang + '.conf'
6291    else:
6292        help_file = HELP_FILE
6293    # Print [topic] section from help file.
6294    config.load_from_dirs(help_file)
6295    if len(config.sections) == 0:
6296        # Default to English if specified language help files not found.
6297        help_file = HELP_FILE
6298        config.load_from_dirs(help_file)
6299    if len(config.sections) == 0:
6300        message.stderr('no help topics found')
6301        sys.exit(1)
6302    n = 0
6303    for k in config.sections:
6304        if re.match(re.escape(topic), k):
6305            n += 1
6306            lines = config.sections[k]
6307    if n == 0:
6308        if topic != 'topics':
6309            message.stderr('help topic not found: [%s] in %s' % (topic, help_file))
6310        message.stderr('available help topics: %s' % ', '.join(list(config.sections.keys())))
6311        sys.exit(1)
6312    elif n > 1:
6313        message.stderr('ambiguous help topic: %s' % topic)
6314    else:
6315        for line in lines:
6316            print(line, file=f)
6317
6318
6319# Used by asciidocapi.py #
6320def execute(cmd, opts, args):
6321    """
6322    Execute asciidoc with command-line options and arguments.
6323    cmd is asciidoc command or asciidoc.py path.
6324    opts and args conform to values returned by getopt.getopt().
6325    Raises SystemExit if an error occurs.
6326
6327    Doctests:
6328
6329    1. Check execution:
6330
6331       >>> infile = io.StringIO('Hello *{author}*')
6332       >>> outfile = io.StringIO()
6333       >>> opts = []
6334       >>> opts.append(('--backend','html4'))
6335       >>> opts.append(('--no-header-footer',None))
6336       >>> opts.append(('--attribute','author=Joe Bloggs'))
6337       >>> opts.append(('--out-file',outfile))
6338       >>> execute(__file__, opts, [infile])
6339       >>> print(outfile.getvalue())
6340       <p>Hello <strong>Joe Bloggs</strong></p>
6341
6342       >>>
6343
6344    """
6345    config.init(cmd)
6346    if len(args) > 1:
6347        usage('Too many arguments')
6348        sys.exit(1)
6349    backend = None
6350    doctype = None
6351    confiles = []
6352    outfile = None
6353    options = []
6354    help_option = False
6355    for o, v in opts:
6356        if o in ('--help', '-h'):
6357            help_option = True
6358        # DEPRECATED: --unsafe option.
6359        if o == '--unsafe':
6360            document.safe = False
6361        if o == '--safe':
6362            document.safe = True
6363        if o == '--version':
6364            print(('asciidoc %s' % VERSION))
6365            sys.exit(0)
6366        if o in ('-b', '--backend'):
6367            backend = v
6368        if o in ('-c', '--dump-conf'):
6369            options.append('-c')
6370        if o in ('-d', '--doctype'):
6371            doctype = v
6372        if o in ('-e', '--no-conf'):
6373            options.append('-e')
6374        if o in ('-f', '--conf-file'):
6375            confiles.append(v)
6376        if o == '--filter':
6377            config.filters.append(v)
6378        if o in ('-n', '--section-numbers'):
6379            o = '-a'
6380            v = 'numbered'
6381        if o == '--theme':
6382            o = '-a'
6383            v = 'theme=' + v
6384        if o in ('-a', '--attribute'):
6385            e = parse_entry(v, allow_name_only=True)
6386            if not e:
6387                usage('Illegal -a option: %s' % v)
6388                sys.exit(1)
6389            k, v = e
6390            # A @ suffix denotes don't override existing document attributes.
6391            if v and v[-1] == '@':
6392                document.attributes[k] = v[:-1]
6393            else:
6394                config.cmd_attrs[k] = v
6395        if o in ('-o', '--out-file'):
6396            outfile = v
6397        if o in ('-s', '--no-header-footer'):
6398            options.append('-s')
6399        if o in ('-v', '--verbose'):
6400            options.append('-v')
6401    if help_option:
6402        if len(args) == 0:
6403            show_help('default')
6404        else:
6405            show_help(args[-1])
6406        sys.exit(0)
6407    if len(args) == 0 and len(opts) == 0:
6408        usage()
6409        sys.exit(0)
6410    if len(args) == 0:
6411        usage('No source file specified')
6412        sys.exit(1)
6413    stdin, stdout = sys.stdin, sys.stdout
6414    try:
6415        infile = args[0]
6416        if infile == '-':
6417            infile = '<stdin>'
6418        elif isinstance(infile, str):
6419            infile = os.path.abspath(infile)
6420        else:   # Input file is file object from API call.
6421            sys.stdin = infile
6422            infile = '<stdin>'
6423        if outfile == '-':
6424            outfile = '<stdout>'
6425        elif isinstance(outfile, str):
6426            outfile = os.path.abspath(outfile)
6427        elif outfile is None:
6428            if infile == '<stdin>':
6429                outfile = '<stdout>'
6430        else:   # Output file is file object from API call.
6431            sys.stdout = outfile
6432            outfile = '<stdout>'
6433        # Do the work.
6434        asciidoc(backend, doctype, confiles, infile, outfile, options)
6435        if document.has_errors:
6436            sys.exit(1)
6437    finally:
6438        sys.stdin, sys.stdout = stdin, stdout
6439
6440
6441if __name__ == '__main__':
6442    # Process command line options.
6443    try:
6444        # DEPRECATED: --unsafe option.
6445        opts, args = getopt.getopt(sys.argv[1:], 'a:b:cd:ef:hno:svw:',
6446                                   ['attribute=', 'backend=', 'conf-file=', 'doctype=', 'dump-conf',
6447                                    'help', 'no-conf', 'no-header-footer', 'out-file=',
6448                                    'section-numbers', 'verbose', 'version', 'safe', 'unsafe',
6449                                    'doctest', 'filter=', 'theme='])
6450    except getopt.GetoptError:
6451        message.stderr('illegal command options')
6452        sys.exit(1)
6453    opt_names = [opt[0] for opt in opts]
6454    if '--doctest' in opt_names:
6455        # Run module doctests.
6456        import doctest
6457        options = doctest.NORMALIZE_WHITESPACE + doctest.ELLIPSIS
6458        failures, tries = doctest.testmod(optionflags=options)
6459        if failures == 0:
6460            message.stderr('All doctests passed')
6461            sys.exit(0)
6462        else:
6463            sys.exit(1)
6464    # Look for plugin management commands.
6465    count = 0
6466    for o, v in opts:
6467        if o in ('-b', '--backend', '--filter', '--theme'):
6468            if o == '-b':
6469                o = '--backend'
6470            plugin = o[2:]
6471            cmd = v
6472            if cmd not in Plugin.CMDS:
6473                continue
6474            count += 1
6475    if count > 1:
6476        die('--backend, --filter and --theme options are mutually exclusive')
6477    if count == 1:
6478        # Execute plugin management commands.
6479        if not cmd:
6480            die('missing --%s command' % plugin)
6481        if cmd not in Plugin.CMDS:
6482            die('illegal --%s command: %s' % (plugin, cmd))
6483        Plugin.type = plugin
6484        config.init(sys.argv[0])
6485        config.verbose = bool(set(['-v', '--verbose']) & set(opt_names))
6486        getattr(Plugin, cmd)(args)
6487    else:
6488        # Execute asciidoc.
6489        try:
6490            execute(sys.argv[0], opts, args)
6491        except KeyboardInterrupt:
6492            sys.exit(1)
6493