1#!/usr/local/bin/python3.8
2
3"""
4a2x - A toolchain manager for AsciiDoc (converts Asciidoc text files to other
5      file formats)
6
7Free use of this software is granted under the terms of the MIT license.
8
9Copyright (C) 2002-2013 Stuart Rackham.
10Copyright (C) 2013-2020 AsciiDoc Contributors.
11
12Permission is hereby granted, free of charge, to any person obtaining a copy
13of this software and associated documentation files (the "Software"), to deal
14in the Software without restriction, including without limitation the rights
15to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16copies of the Software, and to permit persons to whom the Software is
17furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice shall be included in all
20copies or substantial portions of the Software.
21
22THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28SOFTWARE.
29"""
30
31import os
32import fnmatch
33from html.parser import HTMLParser
34import re
35import shutil
36import subprocess
37import sys
38import traceback
39from urllib.parse import urlparse
40import zipfile
41import xml.dom.minidom
42import mimetypes
43
44PROG = os.path.basename(os.path.splitext(__file__)[0])
45VERSION = '9.1.1'
46
47# AsciiDoc global configuration file directory.
48# NOTE: CONF_DIR is "fixed up" by Makefile -- don't rename or change syntax.
49CONF_DIR = '/usr/local/etc/asciidoc'
50
51
52######################################################################
53# Default configuration file parameters.
54######################################################################
55
56# Optional environment variable dictionary passed to
57# executing programs. If set to None the existing
58# environment is used.
59ENV = None
60
61# External executables.
62ASCIIDOC = 'asciidoc'
63XSLTPROC = 'xsltproc'
64DBLATEX = 'dblatex'         # pdf generation.
65FOP = 'fop'                 # pdf generation (--fop option).
66W3M = 'w3m'                 # primary text file generator.
67LYNX = 'lynx'               # alternate text file generator.
68XMLLINT = 'xmllint'         # Set to '' to disable.
69EPUBCHECK = 'epubcheck'     # Set to '' to disable.
70# External executable default options.
71ASCIIDOC_OPTS = ''
72BACKEND_OPTS = ''
73DBLATEX_OPTS = ''
74FOP_OPTS = ''
75LYNX_OPTS = '-dump'
76W3M_OPTS = '-dump -cols 70 -T text/html -no-graph'
77XSLTPROC_OPTS = ''
78
79######################################################################
80# End of configuration file parameters.
81######################################################################
82
83
84#####################################################################
85# Utility functions
86#####################################################################
87
88OPTIONS = None  # These functions read verbose and dry_run command options.
89
90
91def errmsg(msg):
92    print('%s: %s\n' % (PROG, msg), file=sys.stderr)
93
94
95def warning(msg):
96    errmsg('WARNING: %s' % msg)
97
98
99def infomsg(msg):
100    print('%s: %s' % (PROG, msg))
101
102
103def die(msg, exit_code=1):
104    errmsg('ERROR: %s' % msg)
105    sys.exit(exit_code)
106
107
108def trace():
109    """Print traceback to stderr."""
110    errmsg('-'*60)
111    traceback.print_exc(file=sys.stderr)
112    errmsg('-'*60)
113
114
115def verbose(msg):
116    if OPTIONS.verbose or OPTIONS.dry_run:
117        infomsg(msg)
118
119
120class AttrDict(dict):
121    """
122    Like a dictionary except values can be accessed as attributes i.e. obj.foo
123    can be used in addition to obj['foo'].
124    If self._default has been set then it will be returned if a non-existent
125    attribute is accessed (instead of raising an AttributeError).
126    """
127    def __getattr__(self, key):
128        try:
129            return self[key]
130        except KeyError as k:
131            if '_default' in self:
132                return self['_default']
133            else:
134                raise AttributeError from k
135
136    def __setattr__(self, key, value):
137        self[key] = value
138
139    def __delattr__(self, key):
140        try:
141            del self[key]
142        except KeyError as k:
143            raise AttributeError from k
144
145    def __repr__(self):
146        return '<AttrDict ' + dict.__repr__(self) + '>'
147
148    def __getstate__(self):
149        return dict(self)
150
151    def __setstate__(self, value):
152        for k, v in value.items():
153            self[k] = v
154
155
156def isexecutable(file_name):
157    return os.path.isfile(file_name) and os.access(file_name, os.X_OK)
158
159
160def find_executable(file_name):
161    '''
162    Search for executable file_name in the system PATH.
163    Return full path name or None if not found.
164    '''
165    def _find_executable(file_name):
166        if os.path.split(file_name)[0] != '':
167            # file_name includes directory so don't search path.
168            if not isexecutable(file_name):
169                return None
170            else:
171                return file_name
172        for p in os.environ.get('PATH', os.defpath).split(os.pathsep):
173            f = os.path.join(p, file_name)
174            if isexecutable(f):
175                return os.path.realpath(f)
176        return None
177    if os.name == 'nt' and os.path.splitext(file_name)[1] == '':
178        for ext in ('.cmd', '.bat', '.exe'):
179            result = _find_executable(file_name + ext)
180            if result:
181                break
182    else:
183        result = _find_executable(file_name)
184    return result
185
186
187def write_file(filename, data, mode='w', encoding='utf-8'):
188    with open(filename, mode=mode, encoding=encoding) as f:
189        f.write(data)
190
191
192def read_file(filename, mode='r', encoding='utf-8'):
193    with open(filename, mode=mode, encoding=encoding) as f:
194        return f.read()
195
196
197def shell_cd(path):
198    verbose('chdir %s' % path)
199    if not OPTIONS.dry_run:
200        os.chdir(path)
201
202
203def shell_makedirs(path):
204    if os.path.isdir(path):
205        return
206    verbose('creating %s' % path)
207    if not OPTIONS.dry_run:
208        os.makedirs(path)
209
210
211def shell_copy(src, dst):
212    verbose('copying "%s" to "%s"' % (src, dst))
213    if not OPTIONS.dry_run:
214        shutil.copy(src, dst)
215
216
217def shell_rm(path):
218    if not os.path.exists(path):
219        return
220    verbose('deleting %s' % path)
221    if not OPTIONS.dry_run:
222        os.unlink(path)
223
224
225def shell_rmtree(path):
226    if not os.path.isdir(path):
227        return
228    verbose('deleting %s' % path)
229    if not OPTIONS.dry_run:
230        shutil.rmtree(path)
231
232
233def shell(cmd, raise_error=True):
234    '''
235    Execute command cmd in shell and return tuple
236    (stdoutdata, stderrdata, returncode).
237    If raise_error is True then a non-zero return terminates the application.
238    '''
239    if os.name == 'nt':
240        # TODO: this is probably unnecessary, see:
241        # http://groups.google.com/group/asciidoc/browse_frm/thread/9442ee0c419f1242
242        # Windows doesn't like running scripts directly so explicitly
243        # specify python interpreter.
244        # Extract first (quoted or unquoted) argument.
245        mo = re.match(r'^\s*"\s*(?P<arg0>[^"]+)\s*"', cmd)
246        if not mo:
247            mo = re.match(r'^\s*(?P<arg0>[^ ]+)', cmd)
248        if mo.group('arg0').endswith('.py'):
249            cmd = 'python ' + cmd
250        # Remove redundant quoting -- this is not just cosmetic,
251        # quoting seems to dramatically decrease the allowed command
252        # length in Windows XP.
253        cmd = re.sub(r'"([^ ]+?)"', r'\1', cmd)
254    verbose('executing: %s' % cmd)
255    if OPTIONS.dry_run:
256        return
257    stdout = stderr = subprocess.PIPE
258    try:
259        popen = subprocess.Popen(cmd, stdout=stdout, stderr=stderr,
260                                 shell=True, env=ENV, universal_newlines=True)
261    except OSError as e:
262        die('failed: %s: %s' % (cmd, e))
263    stdoutdata, stderrdata = popen.communicate()
264    if OPTIONS.verbose:
265        print(stdoutdata)
266        print(stderrdata)
267    if popen.returncode != 0 and raise_error:
268        die('%s returned non-zero exit status %d' % (cmd, popen.returncode))
269    return (stdoutdata, stderrdata, popen.returncode)
270
271
272def find_resources(files, tagname, attrname, filter=None):
273    '''
274    Search all files and return a list of local URIs from attrname attribute
275    values in tagname tags.
276    Handles HTML open and XHTML closed tags.
277    Non-local URIs are skipped.
278    files can be a file name or a list of file names.
279    The filter function takes a dictionary of tag attributes and returns True
280    if the URI is to be included.
281    '''
282    class FindResources(HTMLParser):
283        # Nested parser class shares locals with enclosing function.
284        def handle_startendtag(self, tag, attrs):
285            self.handle_starttag(tag, attrs)
286
287        def handle_starttag(self, tag, attrs):
288            attrs = dict(attrs)
289            if tag == tagname and (filter is None or filter(attrs)):
290                # Accept only local URIs.
291                uri = urlparse(attrs[attrname])
292                if uri[0] in ('', 'file') and not uri[1] and uri[2]:
293                    result.append(uri[2])
294
295    if isinstance(files, str):
296        files = [files]
297    result = []
298    for filename in files:
299        verbose('finding resources in: %s' % filename)
300        if OPTIONS.dry_run:
301            continue
302        parser = FindResources()
303        with open(filename, 'rb') as open_file:
304            contents = open_file.read()
305        mo = re.search(b'\A<\?xml.* encoding="(.*?)"', contents)
306        if mo is None:
307            mo = re.search(br'<meta http\-equiv="Content\-Type" content="text\/html; charset=(.*?)">', contents)
308        contents = contents.decode(mo.group(1).decode('utf-8') if mo else 'utf-8')
309        parser.feed(contents)
310        parser.close()
311    result = list(set(result))   # Drop duplicate values.
312    result.sort()
313    return result
314
315
316# NOT USED.
317def copy_files(files, src_dir, dst_dir):
318    '''
319    Copy list of relative file names from src_dir to dst_dir.
320    '''
321    for filename in files:
322        filename = os.path.normpath(filename)
323        if os.path.isabs(filename):
324            continue
325        src = os.path.join(src_dir, filename)
326        dst = os.path.join(dst_dir, filename)
327        if not os.path.exists(dst):
328            if not os.path.isfile(src):
329                warning('missing file: %s' % src)
330                continue
331            dstdir = os.path.dirname(dst)
332            shell_makedirs(dstdir)
333            shell_copy(src, dst)
334
335
336def find_files(path, pattern):
337    '''
338    Return list of file names matching pattern in directory path.
339    '''
340    result = []
341    for (p, dirs, files) in os.walk(path):
342        for f in files:
343            if fnmatch.fnmatch(f, pattern):
344                result.append(os.path.normpath(os.path.join(p, f)))
345    return result
346
347
348def exec_xsltproc(xsl_file, xml_file, dst_dir, opts=''):
349    cwd = os.getcwd()
350    shell_cd(dst_dir)
351    try:
352        shell('"%s" %s "%s" "%s"' % (XSLTPROC, opts, xsl_file, xml_file))
353    finally:
354        shell_cd(cwd)
355
356
357def get_source_options(asciidoc_file):
358    '''
359    Look for a2x command options in AsciiDoc source file.
360    Limitation: options cannot contain double-quote characters.
361    '''
362    def parse_options():
363        # Parse options to result sequence.
364        inquotes = False
365        opt = ''
366        for c in options:
367            if c == '"':
368                if inquotes:
369                    result.append(opt)
370                    opt = ''
371                    inquotes = False
372                else:
373                    inquotes = True
374            elif c == ' ':
375                if inquotes:
376                    opt += c
377                elif opt:
378                    result.append(opt)
379                    opt = ''
380            else:
381                opt += c
382        if opt:
383            result.append(opt)
384
385    result = []
386    if os.path.isfile(asciidoc_file):
387        options = ''
388        with open(asciidoc_file, 'rb') as f:
389            line_number = 0
390            for line in f:
391                line_number += 1
392                mo = re.search(b'^//\s*a2x:', line)
393                if mo:
394                    try:
395                        options += ' ' + line[mo.end():].strip().decode('ascii')
396                    except UnicodeDecodeError as e:
397                        warning(
398                            "Could not decode option to %s " % e.encoding +
399                            "on line %s in %s" % (line_number, asciidoc_file)
400                        )
401        parse_options()
402    return result
403
404
405#####################################################################
406# Application class
407#####################################################################
408
409class A2X(AttrDict):
410    '''
411    a2x options and conversion functions.
412    '''
413
414    def execute(self):
415        '''
416        Process a2x command.
417        '''
418        self.process_options()
419        # Append configuration file options.
420        self.asciidoc_opts += ' ' + ASCIIDOC_OPTS
421        self.dblatex_opts += ' ' + DBLATEX_OPTS
422        self.fop_opts += ' ' + FOP_OPTS
423        self.xsltproc_opts += ' ' + XSLTPROC_OPTS
424        self.backend_opts += ' ' + BACKEND_OPTS
425        # Execute to_* functions.
426        if self.backend:
427            self.to_backend()
428        else:
429            self.__getattribute__('to_'+self.format)()
430        if not (self.keep_artifacts or self.format == 'docbook' or self.skip_asciidoc):
431            shell_rm(self.dst_path('.xml'))
432
433    def load_conf(self):
434        '''
435        Load a2x configuration file from default locations and --conf-file
436        option.
437        '''
438        global ASCIIDOC
439        CONF_FILE = 'a2x.conf'
440        a2xdir = os.path.dirname(os.path.realpath(__file__))
441        conf_files = []
442        # From a2x.py directory.
443        conf_files.append(os.path.join(a2xdir, CONF_FILE))
444        # If the asciidoc executable and conf files are in the a2x directory
445        # then use the local copy of asciidoc and skip the global a2x conf.
446        asciidoc = os.path.join(a2xdir, 'asciidoc.py')
447        asciidoc_conf = os.path.join(a2xdir, 'asciidoc.conf')
448        if os.path.isfile(asciidoc) and os.path.isfile(asciidoc_conf):
449            self.asciidoc = asciidoc
450        else:
451            self.asciidoc = None
452            # From global conf directory.
453            conf_files.append(os.path.join(CONF_DIR, CONF_FILE))
454        # From $HOME directory.
455        home_dir = os.environ.get('HOME')
456        if home_dir is not None:
457            conf_files.append(os.path.join(home_dir, '.asciidoc', CONF_FILE))
458        # If asciidoc is not local to a2x then search the PATH.
459        if not self.asciidoc:
460            self.asciidoc = find_executable(ASCIIDOC)
461            if not self.asciidoc:
462                die('unable to find asciidoc: %s' % ASCIIDOC)
463        # From backend plugin directory.
464        if self.backend is not None:
465            stdout = shell(self.asciidoc + ' --backend list')[0]
466            backends = [(i, os.path.split(i)[1]) for i in stdout.splitlines()]
467            backend_dir = [i[0] for i in backends if i[1] == self.backend]
468            if len(backend_dir) == 0:
469                die('missing %s backend' % self.backend)
470            if len(backend_dir) > 1:
471                die('more than one %s backend' % self.backend)
472            verbose('found %s backend directory: %s' %
473                    (self.backend, backend_dir[0]))
474            conf_files.append(os.path.join(backend_dir[0], 'a2x-backend.py'))
475        # From --conf-file option.
476        if self.conf_file is not None:
477            if not os.path.isfile(self.conf_file):
478                die('missing configuration file: %s' % self.conf_file)
479            conf_files.append(self.conf_file)
480        # From --xsl-file option.
481        if self.xsl_file is not None:
482            if not os.path.isfile(self.xsl_file):
483                die('missing XSL file: %s' % self.xsl_file)
484            self.xsl_file = os.path.abspath(self.xsl_file)
485        # Load ordered files.
486        for f in conf_files:
487            if os.path.isfile(f):
488                verbose('loading configuration file: %s' % f)
489                exec(open(f).read(), globals())
490
491    def process_options(self):
492        '''
493        Validate and command options and set defaults.
494        '''
495        if not os.path.isfile(self.asciidoc_file):
496            die('missing ASCIIDOC_FILE: %s' % self.asciidoc_file)
497        self.asciidoc_file = os.path.abspath(self.asciidoc_file)
498        if os.path.splitext(self.asciidoc_file)[1].lower() == '.xml':
499            self.skip_asciidoc = True
500        else:
501            self.skip_asciidoc = False
502        if not self.destination_dir:
503            self.destination_dir = os.path.dirname(self.asciidoc_file)
504        else:
505            if not os.path.isdir(self.destination_dir):
506                die('missing --destination-dir: %s' % self.destination_dir)
507            self.destination_dir = os.path.abspath(self.destination_dir)
508            if self.format not in ('chunked', 'epub', 'htmlhelp', 'xhtml', 'manpage'):
509                warning('--destination-dir option is only applicable to HTML and manpage based outputs')
510        self.resource_dirs = []
511        self.resource_files = []
512        if self.resource_manifest:
513            if not os.path.isfile(self.resource_manifest):
514                die('missing --resource-manifest: %s' % self.resource_manifest)
515            with open(self.resource_manifest) as f:
516                for r in f:
517                    self.resources.append(r.strip())
518        for r in self.resources:
519            r = os.path.expanduser(r)
520            r = os.path.expandvars(r)
521            if r.endswith('/') or r.endswith('\\'):
522                if os.path.isdir(r):
523                    self.resource_dirs.append(r)
524                else:
525                    die('missing resource directory: %s' % r)
526            elif os.path.isdir(r):
527                self.resource_dirs.append(r)
528            elif r.startswith('.') and '=' in r:
529                ext, mimetype = r.split('=')
530                mimetypes.add_type(mimetype, ext)
531            else:
532                self.resource_files.append(r)
533        for p in (os.path.dirname(self.asciidoc), CONF_DIR):
534            for d in ('images', 'stylesheets'):
535                d = os.path.join(p, d)
536                if os.path.isdir(d):
537                    self.resource_dirs.append(d)
538        verbose('resource files: %s' % self.resource_files)
539        verbose('resource directories: %s' % self.resource_dirs)
540        if not self.doctype and self.format == 'manpage':
541            self.doctype = 'manpage'
542        if self.doctype:
543            self.asciidoc_opts += ' --doctype %s' % self.doctype
544        for attr in self.attributes:
545            self.asciidoc_opts += ' --attribute "%s"' % attr
546        self.xsltproc_opts += ' --nonet --path /usr/local/share/xsl/docbook/manpages/'
547        if self.verbose:
548            self.asciidoc_opts += ' --verbose'
549            self.dblatex_opts += ' -V'
550        if self.icons or self.icons_dir:
551            params = [
552                'callout.graphics 1',
553                'navig.graphics 1',
554                'admon.textlabel 0',
555                'admon.graphics 1',
556            ]
557            if self.icons_dir:
558                params += [
559                    'admon.graphics.path "%s/"' % self.icons_dir,
560                    'callout.graphics.path "%s/callouts/"' % self.icons_dir,
561                    'navig.graphics.path "%s/"' % self.icons_dir,
562                ]
563        else:
564            params = [
565                'callout.graphics 0',
566                'navig.graphics 0',
567                'admon.textlabel 1',
568                'admon.graphics 0',
569            ]
570        if self.stylesheet:
571            params += ['html.stylesheet "%s"' % self.stylesheet]
572        if self.format == 'htmlhelp':
573            params += ['htmlhelp.chm "%s"' % self.basename('.chm'),
574                       'htmlhelp.hhp "%s"' % self.basename('.hhp'),
575                       'htmlhelp.hhk "%s"' % self.basename('.hhk'),
576                       'htmlhelp.hhc "%s"' % self.basename('.hhc')]
577        if self.doctype == 'book':
578            params += ['toc.section.depth 1']
579            # Books are chunked at chapter level.
580            params += ['chunk.section.depth 0']
581        for o in params:
582            if o.split()[0]+' ' not in self.xsltproc_opts:
583                self.xsltproc_opts += ' --stringparam ' + o
584        if self.fop_opts:
585            self.fop = True
586
587    def dst_path(self, ext):
588        '''
589        Return name of file or directory in the destination directory with
590        the same name as the asciidoc source file but with extension ext.
591        '''
592        return os.path.join(self.destination_dir, self.basename(ext))
593
594    def basename(self, ext):
595        '''
596        Return the base name of the asciidoc source file but with extension
597        ext.
598        '''
599        return os.path.basename(os.path.splitext(self.asciidoc_file)[0]) + ext
600
601    def asciidoc_conf_file(self, path):
602        '''
603        Return full path name of file in asciidoc configuration files directory.
604        Search first the directory containing the asciidoc executable then
605        the global configuration file directory.
606        '''
607        f = os.path.join(os.path.dirname(self.asciidoc), path)
608        if not os.path.isfile(f):
609            f = os.path.join(CONF_DIR, path)
610            if not os.path.isfile(f):
611                die('missing configuration file: %s' % f)
612        return os.path.normpath(f)
613
614    def xsl_stylesheet(self, file_name=None):
615        '''
616        Return full path name of file in asciidoc docbook-xsl configuration
617        directory.
618        If an XSL file was specified with the --xsl-file option then it is
619        returned.
620        '''
621        if self.xsl_file is not None:
622            return self.xsl_file
623        if not file_name:
624            file_name = self.format + '.xsl'
625        return self.asciidoc_conf_file(os.path.join('docbook-xsl', file_name))
626
627    def copy_resources(self, html_files, src_dir, dst_dir, resources=[]):
628        '''
629        Search html_files for images and CSS resource URIs (html_files can
630        be a list of file names or a single file name).
631        Copy them from the src_dir to the dst_dir.
632        If not found in src_dir then recursively search all specified
633        resource directories.
634        Optional additional resources files can be passed in the resources
635        list.
636        '''
637        resources = resources[:]
638        resources += find_resources(
639            html_files,
640            'link',
641            'href',
642            lambda attrs: attrs.get('type') == 'text/css'
643        )
644        resources += find_resources(html_files, 'img', 'src')
645        resources += self.resource_files
646        resources = list(set(resources))    # Drop duplicates.
647        resources.sort()
648        for f in resources:
649            if '=' in f:
650                src, dst = f.split('=')
651                if not dst:
652                    dst = src
653            else:
654                src = dst = f
655            src = os.path.normpath(src)
656            dst = os.path.normpath(dst)
657            if os.path.isabs(dst):
658                die('absolute resource file name: %s' % dst)
659            if dst.startswith(os.pardir):
660                die('resource file outside destination directory: %s' % dst)
661            src = os.path.join(src_dir, src)
662            dst = os.path.join(dst_dir, dst)
663            if not os.path.isfile(src):
664                for d in self.resource_dirs:
665                    d = os.path.join(src_dir, d)
666                    found = find_files(d, os.path.basename(src))
667                    if found:
668                        src = found[0]
669                        break
670                else:
671                    if not os.path.isfile(dst):
672                        die('missing resource: %s' % src)
673                    continue
674            # Arrive here if resource file has been found.
675            if os.path.normpath(src) != os.path.normpath(dst):
676                dstdir = os.path.dirname(dst)
677                shell_makedirs(dstdir)
678                shell_copy(src, dst)
679
680    def to_backend(self):
681        '''
682        Convert AsciiDoc source file to a backend output file using the global
683        'to_<backend name>' function (loaded from backend plugin a2x-backend.py
684        file).
685        Executes the global function in an A2X class instance context.
686        '''
687        eval('to_%s(self)' % self.backend)
688
689    def to_docbook(self):
690        '''
691        Use asciidoc to convert asciidoc_file to DocBook.
692        args is a string containing additional asciidoc arguments.
693        '''
694        docbook_file = self.dst_path('.xml')
695        if self.skip_asciidoc:
696            if not os.path.isfile(docbook_file):
697                die('missing docbook file: %s' % docbook_file)
698            return
699        shell('"%s" --backend docbook -a "a2x-format=%s" %s --out-file "%s" "%s"' %
700             (self.asciidoc, self.format, self.asciidoc_opts, docbook_file, self.asciidoc_file))
701        if not self.no_xmllint and XMLLINT:
702            xmllint_options = ['--nonet', '--noout', '--valid']
703            if 'SGML_CATALOG_FILES' in os.environ:
704                xmllint_options.append('--catalogs')
705            shell('"%s" %s "%s"' % (XMLLINT, " ".join(xmllint_options), docbook_file))
706
707    def to_xhtml(self):
708        self.to_docbook()
709        docbook_file = self.dst_path('.xml')
710        xhtml_file = self.dst_path('.html')
711        opts = '%s --output "%s"' % (self.xsltproc_opts, xhtml_file)
712        exec_xsltproc(self.xsl_stylesheet(), docbook_file, self.destination_dir, opts)
713        src_dir = os.path.dirname(self.asciidoc_file)
714        self.copy_resources(xhtml_file, src_dir, self.destination_dir)
715
716    def to_manpage(self):
717        self.to_docbook()
718        docbook_file = self.dst_path('.xml')
719        opts = self.xsltproc_opts
720        exec_xsltproc(self.xsl_stylesheet(), docbook_file, self.destination_dir, opts)
721
722    def to_pdf(self):
723        if self.fop:
724            self.exec_fop()
725        else:
726            self.exec_dblatex()
727
728    def exec_fop(self):
729        self.to_docbook()
730        docbook_file = self.dst_path('.xml')
731        xsl = self.xsl_stylesheet('fo.xsl')
732        fo = self.dst_path('.fo')
733        pdf = self.dst_path('.pdf')
734        opts = '%s --output "%s"' % (self.xsltproc_opts, fo)
735        exec_xsltproc(xsl, docbook_file, self.destination_dir, opts)
736        shell('"%s" %s -fo "%s" -pdf "%s"' % (FOP, self.fop_opts, fo, pdf))
737        if not self.keep_artifacts:
738            shell_rm(fo)
739
740    def exec_dblatex(self):
741        self.to_docbook()
742        docbook_file = self.dst_path('.xml')
743        xsl = self.asciidoc_conf_file(os.path.join('dblatex','asciidoc-dblatex.xsl'))
744        sty = self.asciidoc_conf_file(os.path.join('dblatex','asciidoc-dblatex.sty'))
745        shell('"%s" -t %s -p "%s" -s "%s" %s "%s"' %
746             (DBLATEX, self.format, xsl, sty, self.dblatex_opts, docbook_file))
747
748    def to_dvi(self):
749        self.exec_dblatex()
750
751    def to_ps(self):
752        self.exec_dblatex()
753
754    def to_tex(self):
755        self.exec_dblatex()
756
757    def to_htmlhelp(self):
758        self.to_chunked()
759
760    def to_chunked(self):
761        self.to_docbook()
762        docbook_file = self.dst_path('.xml')
763        opts = self.xsltproc_opts
764        xsl_file = self.xsl_stylesheet()
765        if self.format == 'chunked':
766            dst_dir = self.dst_path('.chunked')
767        elif self.format == 'htmlhelp':
768            dst_dir = self.dst_path('.htmlhelp')
769        if 'base.dir ' not in opts:
770            opts += ' --stringparam base.dir "%s/"' % os.path.basename(dst_dir)
771        # Create content.
772        shell_rmtree(dst_dir)
773        shell_makedirs(dst_dir)
774        exec_xsltproc(xsl_file, docbook_file, self.destination_dir, opts)
775        html_files = find_files(dst_dir, '*.html')
776        src_dir = os.path.dirname(self.asciidoc_file)
777        self.copy_resources(html_files, src_dir, dst_dir)
778
779    def update_epub_manifest(self, opf_file):
780        '''
781        Scan the OEBPS directory for any files that have not been registered in
782        the OPF manifest then add them to the manifest.
783        '''
784        opf_dir = os.path.dirname(opf_file)
785        resource_files = []
786        for (p, dirs, files) in os.walk(os.path.dirname(opf_file)):
787            for f in files:
788                f = os.path.join(p, f)
789                if os.path.isfile(f):
790                    assert f.startswith(opf_dir)
791                    f = '.' + f[len(opf_dir):]
792                    f = os.path.normpath(f)
793                    if f not in ['content.opf']:
794                        resource_files.append(f)
795        opf = xml.dom.minidom.parseString(read_file(opf_file))
796        manifest_files = []
797        manifest = opf.getElementsByTagName('manifest')[0]
798        for el in manifest.getElementsByTagName('item'):
799            f = el.getAttribute('href')
800            f = os.path.normpath(f)
801            manifest_files.append(f)
802        count = 0
803        for f in resource_files:
804            if f not in manifest_files:
805                count += 1
806                verbose('adding to manifest: %s' % f)
807                item = opf.createElement('item')
808                item.setAttribute('href', f.replace(os.path.sep, '/'))
809                item.setAttribute('id', 'a2x-%d' % count)
810                mimetype = mimetypes.guess_type(f)[0]
811                if mimetype is None:
812                    die('unknown mimetype: %s' % f)
813                item.setAttribute('media-type', mimetype)
814                manifest.appendChild(item)
815        if count > 0:
816            write_file(opf_file, opf.toxml())
817
818    def to_epub(self):
819        self.to_docbook()
820        xsl_file = self.xsl_stylesheet()
821        docbook_file = self.dst_path('.xml')
822        epub_file = self.dst_path('.epub')
823        build_dir = epub_file + '.d'
824        shell_rmtree(build_dir)
825        shell_makedirs(build_dir)
826        # Create content.
827        exec_xsltproc(xsl_file, docbook_file, build_dir, self.xsltproc_opts)
828        # Copy resources referenced in the OPF and resources referenced by the
829        # generated HTML (in theory DocBook XSL should ensure they are
830        # identical but this is not always the case).
831        src_dir = os.path.dirname(self.asciidoc_file)
832        dst_dir = os.path.join(build_dir, 'OEBPS')
833        opf_file = os.path.join(dst_dir, 'content.opf')
834        opf_resources = find_resources(opf_file, 'item', 'href')
835        html_files = find_files(dst_dir, '*.html')
836        self.copy_resources(html_files, src_dir, dst_dir, opf_resources)
837        # Register any unregistered resources.
838        self.update_epub_manifest(opf_file)
839        # Build epub archive.
840        cwd = os.getcwd()
841        shell_cd(build_dir)
842        try:
843            if not self.dry_run:
844                zip_archive = zipfile.ZipFile(epub_file, 'w')
845                try:
846                    # Create and add uncompressed mimetype file.
847                    verbose('archiving: mimetype')
848                    write_file('mimetype', 'application/epub+zip')
849                    zip_archive.write('mimetype', compress_type=zipfile.ZIP_STORED)
850                    # Compress all remaining files.
851                    for (p, dirs, files) in os.walk('.'):
852                        for f in files:
853                            f = os.path.normpath(os.path.join(p,f))
854                            if f != 'mimetype':
855                                verbose('archiving: %s' % f)
856                                zip_archive.write(f, compress_type=zipfile.ZIP_DEFLATED)
857                finally:
858                    zip_archive.close()
859            verbose('created archive: %s' % epub_file)
860        finally:
861            shell_cd(cwd)
862        if not self.keep_artifacts:
863            shell_rmtree(build_dir)
864        if self.epubcheck and EPUBCHECK:
865            if not find_executable(EPUBCHECK):
866                warning('epubcheck skipped: unable to find executable: %s' % EPUBCHECK)
867            else:
868                shell('"%s" "%s"' % (EPUBCHECK, epub_file))
869
870    def to_text(self):
871        text_file = self.dst_path('.text')
872        html_file = self.dst_path('.text.html')
873        if self.lynx:
874            shell('"%s" %s --conf-file "%s" -b html4 -a "a2x-format=%s" -o "%s" "%s"' %
875                 (self.asciidoc, self.asciidoc_opts, self.asciidoc_conf_file('text.conf'),
876                  self.format, html_file, self.asciidoc_file))
877            cmd = '"%s" %s "%s" > "%s"' % (LYNX, LYNX_OPTS, html_file, text_file)
878            shell(cmd)
879        else:
880            # Use w3m(1).
881            self.to_docbook()
882            docbook_file = self.dst_path('.xml')
883            opts = '%s --output "%s"' % (self.xsltproc_opts, html_file)
884            exec_xsltproc(self.xsl_stylesheet(), docbook_file,
885                    self.destination_dir, opts)
886            cmd = '"%s" %s "%s" > "%s"' % (W3M, W3M_OPTS, html_file, text_file)
887            shell(cmd)
888        if not self.keep_artifacts:
889            shell_rm(html_file)
890
891
892#####################################################################
893# Script main line.
894#####################################################################
895
896if __name__ == '__main__':
897    description = '''A toolchain manager for AsciiDoc (converts Asciidoc text files to other file formats)'''
898    from optparse import OptionParser
899    parser = OptionParser(usage='usage: %prog [OPTIONS] SOURCE_FILE',
900        version='%s %s' % (PROG,VERSION),
901        description=description)
902    parser.add_option('-a', '--attribute',
903        action='append', dest='attributes', default=[], metavar='ATTRIBUTE',
904        help='set asciidoc attribute value')
905    parser.add_option('--asciidoc-opts',
906        action='append', dest='asciidoc_opts', default=[],
907        metavar='ASCIIDOC_OPTS', help='asciidoc options')
908    #DEPRECATED
909    parser.add_option('--copy',
910        action='store_true', dest='copy', default=False,
911        help='DEPRECATED: does nothing')
912    parser.add_option('--conf-file',
913        dest='conf_file', default=None, metavar='CONF_FILE',
914        help='configuration file')
915    parser.add_option('-D', '--destination-dir',
916        action='store', dest='destination_dir', default=None, metavar='PATH',
917        help='output directory (defaults to SOURCE_FILE directory)')
918    parser.add_option('-d','--doctype',
919        action='store', dest='doctype', metavar='DOCTYPE',
920        choices=('article','manpage','book'),
921        help='article, manpage, book')
922    parser.add_option('-b','--backend',
923        action='store', dest='backend', metavar='BACKEND',
924        help='name of backend plugin')
925    parser.add_option('--epubcheck',
926        action='store_true', dest='epubcheck', default=False,
927        help='check EPUB output with epubcheck')
928    parser.add_option('-f','--format',
929        action='store', dest='format', metavar='FORMAT', default = 'pdf',
930        choices=('chunked','epub','htmlhelp','manpage','pdf', 'text',
931                 'xhtml','dvi','ps','tex','docbook'),
932        help='chunked, epub, htmlhelp, manpage, pdf, text, xhtml, dvi, ps, tex, docbook')
933    parser.add_option('--icons',
934        action='store_true', dest='icons', default=False,
935        help='use admonition, callout and navigation icons')
936    parser.add_option('--icons-dir',
937        action='store', dest='icons_dir',
938        default=None, metavar='PATH',
939        help='admonition and navigation icon directory')
940    parser.add_option('-k', '--keep-artifacts',
941        action='store_true', dest='keep_artifacts', default=False,
942        help='do not delete temporary build files')
943    parser.add_option('--lynx',
944        action='store_true', dest='lynx', default=False,
945        help='use lynx to generate text files')
946    parser.add_option('-L', '--no-xmllint',
947        action='store_true', dest='no_xmllint', default=False,
948        help='do not check asciidoc output with xmllint')
949    parser.add_option('-n','--dry-run',
950        action='store_true', dest='dry_run', default=False,
951        help='just print the commands that would have been executed')
952    parser.add_option('-r','--resource',
953        action='append', dest='resources', default=[],
954        metavar='PATH',
955        help='resource file or directory containing resource files')
956    parser.add_option('-m', '--resource-manifest',
957        action='store', dest='resource_manifest', default=None, metavar='FILE',
958        help='read resources from FILE')
959    #DEPRECATED
960    parser.add_option('--resource-dir',
961        action='append', dest='resources', default=[],
962        metavar='PATH',
963        help='DEPRECATED: use --resource')
964    #DEPRECATED
965    parser.add_option('-s','--skip-asciidoc',
966        action='store_true', dest='skip_asciidoc', default=False,
967        help='DEPRECATED: redundant')
968    parser.add_option('--stylesheet',
969        action='store', dest='stylesheet', default=None,
970        metavar='STYLESHEET',
971        help='HTML CSS stylesheet file name')
972    #DEPRECATED
973    parser.add_option('--safe',
974        action='store_true', dest='safe', default=False,
975        help='DEPRECATED: does nothing')
976    parser.add_option('--dblatex-opts',
977        action='append', dest='dblatex_opts', default=[],
978        metavar='DBLATEX_OPTS', help='dblatex options')
979    parser.add_option('--backend-opts',
980        action='append', dest='backend_opts', default=[],
981        metavar='BACKEND_OPTS', help='backend plugin options')
982    parser.add_option('--fop',
983        action='store_true', dest='fop', default=False,
984        help='use FOP to generate PDF files')
985    parser.add_option('--fop-opts',
986        action='append', dest='fop_opts', default=[],
987        metavar='FOP_OPTS', help='options for FOP pdf generation')
988    parser.add_option('--xsltproc-opts',
989        action='append', dest='xsltproc_opts', default=[],
990        metavar='XSLTPROC_OPTS', help='xsltproc options for XSL stylesheets')
991    parser.add_option('--xsl-file',
992        action='store', dest='xsl_file', metavar='XSL_FILE',
993        help='custom XSL stylesheet')
994    parser.add_option('-v', '--verbose',
995        action='count', dest='verbose', default=0,
996        help='increase verbosity')
997    if len(sys.argv) == 1:
998        parser.parse_args(['--help'])
999    source_options = get_source_options(sys.argv[-1])
1000    argv = source_options + sys.argv[1:]
1001    opts, args = parser.parse_args(argv)
1002    if len(args) != 1:
1003        parser.error('incorrect number of arguments')
1004    opts.asciidoc_opts = ' '.join(opts.asciidoc_opts)
1005    opts.dblatex_opts = ' '.join(opts.dblatex_opts)
1006    opts.fop_opts = ' '.join(opts.fop_opts)
1007    opts.xsltproc_opts = ' '.join(opts.xsltproc_opts)
1008    opts.backend_opts = ' '.join(opts.backend_opts)
1009    opts = eval(str(opts))  # Convert optparse.Values to dict.
1010    a2x = A2X(opts)
1011    OPTIONS = a2x           # verbose and dry_run used by utility functions.
1012    verbose('args: %r' % argv)
1013    a2x.asciidoc_file = args[0]
1014    try:
1015        a2x.load_conf()
1016        a2x.execute()
1017    except KeyboardInterrupt:
1018        sys.exit(1)
1019