xref: /linux/Documentation/sphinx/kfigure.py (revision 2da68a77)
1# -*- coding: utf-8; mode: python -*-
2# pylint: disable=C0103, R0903, R0912, R0915
3u"""
4    scalable figure and image handling
5    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6
7    Sphinx extension which implements scalable image handling.
8
9    :copyright:  Copyright (C) 2016  Markus Heiser
10    :license:    GPL Version 2, June 1991 see Linux/COPYING for details.
11
12    The build for image formats depend on image's source format and output's
13    destination format. This extension implement methods to simplify image
14    handling from the author's POV. Directives like ``kernel-figure`` implement
15    methods *to* always get the best output-format even if some tools are not
16    installed. For more details take a look at ``convert_image(...)`` which is
17    the core of all conversions.
18
19    * ``.. kernel-image``: for image handling / a ``.. image::`` replacement
20
21    * ``.. kernel-figure``: for figure handling / a ``.. figure::`` replacement
22
23    * ``.. kernel-render``: for render markup / a concept to embed *render*
24      markups (or languages). Supported markups (see ``RENDER_MARKUP_EXT``)
25
26      - ``DOT``: render embedded Graphviz's **DOC**
27      - ``SVG``: render embedded Scalable Vector Graphics (**SVG**)
28      - ... *developable*
29
30    Used tools:
31
32    * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
33      available, the DOT language is inserted as literal-block.
34      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
35      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
36
37    * SVG to PDF: To generate PDF, you need at least one of this tools:
38
39      - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
40      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
41
42    List of customizations:
43
44    * generate PDF from SVG / used by PDF (LaTeX) builder
45
46    * generate SVG (html-builder) and PDF (latex-builder) from DOT files.
47      DOT: see https://www.graphviz.org/content/dot-language
48
49    """
50
51import os
52from os import path
53import subprocess
54from hashlib import sha1
55import re
56from docutils import nodes
57from docutils.statemachine import ViewList
58from docutils.parsers.rst import directives
59from docutils.parsers.rst.directives import images
60import sphinx
61from sphinx.util.nodes import clean_astext
62import kernellog
63
64# Get Sphinx version
65major, minor, patch = sphinx.version_info[:3]
66if major == 1 and minor > 3:
67    # patches.Figure only landed in Sphinx 1.4
68    from sphinx.directives.patches import Figure  # pylint: disable=C0413
69else:
70    Figure = images.Figure
71
72__version__  = '1.0.0'
73
74# simple helper
75# -------------
76
77def which(cmd):
78    """Searches the ``cmd`` in the ``PATH`` environment.
79
80    This *which* searches the PATH for executable ``cmd`` . First match is
81    returned, if nothing is found, ``None` is returned.
82    """
83    envpath = os.environ.get('PATH', None) or os.defpath
84    for folder in envpath.split(os.pathsep):
85        fname = folder + os.sep + cmd
86        if path.isfile(fname):
87            return fname
88
89def mkdir(folder, mode=0o775):
90    if not path.isdir(folder):
91        os.makedirs(folder, mode)
92
93def file2literal(fname):
94    with open(fname, "r") as src:
95        data = src.read()
96        node = nodes.literal_block(data, data)
97    return node
98
99def isNewer(path1, path2):
100    """Returns True if ``path1`` is newer than ``path2``
101
102    If ``path1`` exists and is newer than ``path2`` the function returns
103    ``True`` is returned otherwise ``False``
104    """
105    return (path.exists(path1)
106            and os.stat(path1).st_ctime > os.stat(path2).st_ctime)
107
108def pass_handle(self, node):           # pylint: disable=W0613
109    pass
110
111# setup conversion tools and sphinx extension
112# -------------------------------------------
113
114# Graphviz's dot(1) support
115dot_cmd = None
116# dot(1) -Tpdf should be used
117dot_Tpdf = False
118
119# ImageMagick' convert(1) support
120convert_cmd = None
121
122# librsvg's rsvg-convert(1) support
123rsvg_convert_cmd = None
124
125# Inkscape's inkscape(1) support
126inkscape_cmd = None
127# Inkscape prior to 1.0 uses different command options
128inkscape_ver_one = False
129
130
131def setup(app):
132    # check toolchain first
133    app.connect('builder-inited', setupTools)
134
135    # image handling
136    app.add_directive("kernel-image",  KernelImage)
137    app.add_node(kernel_image,
138                 html    = (visit_kernel_image, pass_handle),
139                 latex   = (visit_kernel_image, pass_handle),
140                 texinfo = (visit_kernel_image, pass_handle),
141                 text    = (visit_kernel_image, pass_handle),
142                 man     = (visit_kernel_image, pass_handle), )
143
144    # figure handling
145    app.add_directive("kernel-figure", KernelFigure)
146    app.add_node(kernel_figure,
147                 html    = (visit_kernel_figure, pass_handle),
148                 latex   = (visit_kernel_figure, pass_handle),
149                 texinfo = (visit_kernel_figure, pass_handle),
150                 text    = (visit_kernel_figure, pass_handle),
151                 man     = (visit_kernel_figure, pass_handle), )
152
153    # render handling
154    app.add_directive('kernel-render', KernelRender)
155    app.add_node(kernel_render,
156                 html    = (visit_kernel_render, pass_handle),
157                 latex   = (visit_kernel_render, pass_handle),
158                 texinfo = (visit_kernel_render, pass_handle),
159                 text    = (visit_kernel_render, pass_handle),
160                 man     = (visit_kernel_render, pass_handle), )
161
162    app.connect('doctree-read', add_kernel_figure_to_std_domain)
163
164    return dict(
165        version = __version__,
166        parallel_read_safe = True,
167        parallel_write_safe = True
168    )
169
170
171def setupTools(app):
172    u"""
173    Check available build tools and log some *verbose* messages.
174
175    This function is called once, when the builder is initiated.
176    """
177    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
178    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
179    kernellog.verbose(app, "kfigure: check installed tools ...")
180
181    dot_cmd = which('dot')
182    convert_cmd = which('convert')
183    rsvg_convert_cmd = which('rsvg-convert')
184    inkscape_cmd = which('inkscape')
185
186    if dot_cmd:
187        kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
188
189        try:
190            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
191                                    stderr=subprocess.STDOUT)
192        except subprocess.CalledProcessError as err:
193            dot_Thelp_list = err.output
194            pass
195
196        dot_Tpdf_ptn = b'pdf'
197        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
198    else:
199        kernellog.warn(app, "dot(1) not found, for better output quality install "
200                       "graphviz from https://www.graphviz.org")
201    if inkscape_cmd:
202        kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
203        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
204                                               stderr=subprocess.DEVNULL)
205        ver_one_ptn = b'Inkscape 1'
206        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
207        convert_cmd = None
208        rsvg_convert_cmd = None
209        dot_Tpdf = False
210
211    else:
212        if convert_cmd:
213            kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
214        else:
215            kernellog.verbose(app,
216                "Neither inkscape(1) nor convert(1) found.\n"
217                "For SVG to PDF conversion, "
218                "install either Inkscape (https://inkscape.org/) (preferred) or\n"
219                "ImageMagick (https://www.imagemagick.org)")
220
221        if rsvg_convert_cmd:
222            kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
223            kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
224            dot_Tpdf = False
225        else:
226            kernellog.verbose(app,
227                "rsvg-convert(1) not found.\n"
228                "  SVG rendering of convert(1) is done by ImageMagick-native renderer.")
229            if dot_Tpdf:
230                kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
231            else:
232                kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
233
234
235# integrate conversion tools
236# --------------------------
237
238RENDER_MARKUP_EXT = {
239    # The '.ext' must be handled by convert_image(..) function's *in_ext* input.
240    # <name> : <.ext>
241    'DOT' : '.dot',
242    'SVG' : '.svg'
243}
244
245def convert_image(img_node, translator, src_fname=None):
246    """Convert a image node for the builder.
247
248    Different builder prefer different image formats, e.g. *latex* builder
249    prefer PDF while *html* builder prefer SVG format for images.
250
251    This function handles output image formats in dependence of source the
252    format (of the image) and the translator's output format.
253    """
254    app = translator.builder.app
255
256    fname, in_ext = path.splitext(path.basename(img_node['uri']))
257    if src_fname is None:
258        src_fname = path.join(translator.builder.srcdir, img_node['uri'])
259        if not path.exists(src_fname):
260            src_fname = path.join(translator.builder.outdir, img_node['uri'])
261
262    dst_fname = None
263
264    # in kernel builds, use 'make SPHINXOPTS=-v' to see verbose messages
265
266    kernellog.verbose(app, 'assert best format for: ' + img_node['uri'])
267
268    if in_ext == '.dot':
269
270        if not dot_cmd:
271            kernellog.verbose(app,
272                              "dot from graphviz not available / include DOT raw.")
273            img_node.replace_self(file2literal(src_fname))
274
275        elif translator.builder.format == 'latex':
276            dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
277            img_node['uri'] = fname + '.pdf'
278            img_node['candidates'] = {'*': fname + '.pdf'}
279
280
281        elif translator.builder.format == 'html':
282            dst_fname = path.join(
283                translator.builder.outdir,
284                translator.builder.imagedir,
285                fname + '.svg')
286            img_node['uri'] = path.join(
287                translator.builder.imgpath, fname + '.svg')
288            img_node['candidates'] = {
289                '*': path.join(translator.builder.imgpath, fname + '.svg')}
290
291        else:
292            # all other builder formats will include DOT as raw
293            img_node.replace_self(file2literal(src_fname))
294
295    elif in_ext == '.svg':
296
297        if translator.builder.format == 'latex':
298            if not inkscape_cmd and convert_cmd is None:
299                kernellog.warn(app,
300                                  "no SVG to PDF conversion available / include SVG raw."
301                                  "\nIncluding large raw SVGs can cause xelatex error."
302                                  "\nInstall Inkscape (preferred) or ImageMagick.")
303                img_node.replace_self(file2literal(src_fname))
304            else:
305                dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
306                img_node['uri'] = fname + '.pdf'
307                img_node['candidates'] = {'*': fname + '.pdf'}
308
309    if dst_fname:
310        # the builder needs not to copy one more time, so pop it if exists.
311        translator.builder.images.pop(img_node['uri'], None)
312        _name = dst_fname[len(translator.builder.outdir) + 1:]
313
314        if isNewer(dst_fname, src_fname):
315            kernellog.verbose(app,
316                              "convert: {out}/%s already exists and is newer" % _name)
317
318        else:
319            ok = False
320            mkdir(path.dirname(dst_fname))
321
322            if in_ext == '.dot':
323                kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
324                if translator.builder.format == 'latex' and not dot_Tpdf:
325                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
326                    ok1 = dot2format(app, src_fname, svg_fname)
327                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
328                    ok = ok1 and ok2
329
330                else:
331                    ok = dot2format(app, src_fname, dst_fname)
332
333            elif in_ext == '.svg':
334                kernellog.verbose(app, 'convert SVG to: {out}/' + _name)
335                ok = svg2pdf(app, src_fname, dst_fname)
336
337            if not ok:
338                img_node.replace_self(file2literal(src_fname))
339
340
341def dot2format(app, dot_fname, out_fname):
342    """Converts DOT file to ``out_fname`` using ``dot(1)``.
343
344    * ``dot_fname`` pathname of the input DOT file, including extension ``.dot``
345    * ``out_fname`` pathname of the output file, including format extension
346
347    The *format extension* depends on the ``dot`` command (see ``man dot``
348    option ``-Txxx``). Normally you will use one of the following extensions:
349
350    - ``.ps`` for PostScript,
351    - ``.svg`` or ``svgz`` for Structured Vector Graphics,
352    - ``.fig`` for XFIG graphics and
353    - ``.png`` or ``gif`` for common bitmap graphics.
354
355    """
356    out_format = path.splitext(out_fname)[1][1:]
357    cmd = [dot_cmd, '-T%s' % out_format, dot_fname]
358    exit_code = 42
359
360    with open(out_fname, "w") as out:
361        exit_code = subprocess.call(cmd, stdout = out)
362        if exit_code != 0:
363            kernellog.warn(app,
364                          "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
365    return bool(exit_code == 0)
366
367def svg2pdf(app, svg_fname, pdf_fname):
368    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
369
370    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
371    from ImageMagick (https://www.imagemagick.org) for conversion.
372    Returns ``True`` on success and ``False`` if an error occurred.
373
374    * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
375    * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
376
377    """
378    cmd = [convert_cmd, svg_fname, pdf_fname]
379    cmd_name = 'convert(1)'
380
381    if inkscape_cmd:
382        cmd_name = 'inkscape(1)'
383        if inkscape_ver_one:
384            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
385        else:
386            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
387
388    try:
389        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
390        exit_code = 0
391    except subprocess.CalledProcessError as err:
392        warning_msg = err.output
393        exit_code = err.returncode
394        pass
395
396    if exit_code != 0:
397        kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
398        if warning_msg:
399            kernellog.warn(app, "Warning msg from %s: %s"
400                           % (cmd_name, str(warning_msg, 'utf-8')))
401    elif warning_msg:
402        kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s"
403                          % (cmd_name, str(warning_msg, 'utf-8')))
404
405    return bool(exit_code == 0)
406
407def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
408    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
409
410    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
411    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
412
413    Input SVG file should be the one generated by ``dot2format()``.
414    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
415
416    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
417
418    """
419
420    if rsvg_convert_cmd is None:
421        ok = svg2pdf(app, svg_fname, pdf_fname)
422    else:
423        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
424        # use stdout and stderr from parent
425        exit_code = subprocess.call(cmd)
426        if exit_code != 0:
427            kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
428        ok = bool(exit_code == 0)
429
430    return ok
431
432
433# image handling
434# ---------------------
435
436def visit_kernel_image(self, node):    # pylint: disable=W0613
437    """Visitor of the ``kernel_image`` Node.
438
439    Handles the ``image`` child-node with the ``convert_image(...)``.
440    """
441    img_node = node[0]
442    convert_image(img_node, self)
443
444class kernel_image(nodes.image):
445    """Node for ``kernel-image`` directive."""
446    pass
447
448class KernelImage(images.Image):
449    u"""KernelImage directive
450
451    Earns everything from ``.. image::`` directive, except *remote URI* and
452    *glob* pattern. The KernelImage wraps a image node into a
453    kernel_image node. See ``visit_kernel_image``.
454    """
455
456    def run(self):
457        uri = self.arguments[0]
458        if uri.endswith('.*') or uri.find('://') != -1:
459            raise self.severe(
460                'Error in "%s: %s": glob pattern and remote images are not allowed'
461                % (self.name, uri))
462        result = images.Image.run(self)
463        if len(result) == 2 or isinstance(result[0], nodes.system_message):
464            return result
465        (image_node,) = result
466        # wrap image node into a kernel_image node / see visitors
467        node = kernel_image('', image_node)
468        return [node]
469
470# figure handling
471# ---------------------
472
473def visit_kernel_figure(self, node):   # pylint: disable=W0613
474    """Visitor of the ``kernel_figure`` Node.
475
476    Handles the ``image`` child-node with the ``convert_image(...)``.
477    """
478    img_node = node[0][0]
479    convert_image(img_node, self)
480
481class kernel_figure(nodes.figure):
482    """Node for ``kernel-figure`` directive."""
483
484class KernelFigure(Figure):
485    u"""KernelImage directive
486
487    Earns everything from ``.. figure::`` directive, except *remote URI* and
488    *glob* pattern.  The KernelFigure wraps a figure node into a kernel_figure
489    node. See ``visit_kernel_figure``.
490    """
491
492    def run(self):
493        uri = self.arguments[0]
494        if uri.endswith('.*') or uri.find('://') != -1:
495            raise self.severe(
496                'Error in "%s: %s":'
497                ' glob pattern and remote images are not allowed'
498                % (self.name, uri))
499        result = Figure.run(self)
500        if len(result) == 2 or isinstance(result[0], nodes.system_message):
501            return result
502        (figure_node,) = result
503        # wrap figure node into a kernel_figure node / see visitors
504        node = kernel_figure('', figure_node)
505        return [node]
506
507
508# render handling
509# ---------------------
510
511def visit_kernel_render(self, node):
512    """Visitor of the ``kernel_render`` Node.
513
514    If rendering tools available, save the markup of the ``literal_block`` child
515    node into a file and replace the ``literal_block`` node with a new created
516    ``image`` node, pointing to the saved markup file. Afterwards, handle the
517    image child-node with the ``convert_image(...)``.
518    """
519    app = self.builder.app
520    srclang = node.get('srclang')
521
522    kernellog.verbose(app, 'visit kernel-render node lang: "%s"' % (srclang))
523
524    tmp_ext = RENDER_MARKUP_EXT.get(srclang, None)
525    if tmp_ext is None:
526        kernellog.warn(app, 'kernel-render: "%s" unknown / include raw.' % (srclang))
527        return
528
529    if not dot_cmd and tmp_ext == '.dot':
530        kernellog.verbose(app, "dot from graphviz not available / include raw.")
531        return
532
533    literal_block = node[0]
534
535    code      = literal_block.astext()
536    hashobj   = code.encode('utf-8') #  str(node.attributes)
537    fname     = path.join('%s-%s' % (srclang, sha1(hashobj).hexdigest()))
538
539    tmp_fname = path.join(
540        self.builder.outdir, self.builder.imagedir, fname + tmp_ext)
541
542    if not path.isfile(tmp_fname):
543        mkdir(path.dirname(tmp_fname))
544        with open(tmp_fname, "w") as out:
545            out.write(code)
546
547    img_node = nodes.image(node.rawsource, **node.attributes)
548    img_node['uri'] = path.join(self.builder.imgpath, fname + tmp_ext)
549    img_node['candidates'] = {
550        '*': path.join(self.builder.imgpath, fname + tmp_ext)}
551
552    literal_block.replace_self(img_node)
553    convert_image(img_node, self, tmp_fname)
554
555
556class kernel_render(nodes.General, nodes.Inline, nodes.Element):
557    """Node for ``kernel-render`` directive."""
558    pass
559
560class KernelRender(Figure):
561    u"""KernelRender directive
562
563    Render content by external tool.  Has all the options known from the
564    *figure*  directive, plus option ``caption``.  If ``caption`` has a
565    value, a figure node with the *caption* is inserted. If not, a image node is
566    inserted.
567
568    The KernelRender directive wraps the text of the directive into a
569    literal_block node and wraps it into a kernel_render node. See
570    ``visit_kernel_render``.
571    """
572    has_content = True
573    required_arguments = 1
574    optional_arguments = 0
575    final_argument_whitespace = False
576
577    # earn options from 'figure'
578    option_spec = Figure.option_spec.copy()
579    option_spec['caption'] = directives.unchanged
580
581    def run(self):
582        return [self.build_node()]
583
584    def build_node(self):
585
586        srclang = self.arguments[0].strip()
587        if srclang not in RENDER_MARKUP_EXT.keys():
588            return [self.state_machine.reporter.warning(
589                'Unknown source language "%s", use one of: %s.' % (
590                    srclang, ",".join(RENDER_MARKUP_EXT.keys())),
591                line=self.lineno)]
592
593        code = '\n'.join(self.content)
594        if not code.strip():
595            return [self.state_machine.reporter.warning(
596                'Ignoring "%s" directive without content.' % (
597                    self.name),
598                line=self.lineno)]
599
600        node = kernel_render()
601        node['alt'] = self.options.get('alt','')
602        node['srclang'] = srclang
603        literal_node = nodes.literal_block(code, code)
604        node += literal_node
605
606        caption = self.options.get('caption')
607        if caption:
608            # parse caption's content
609            parsed = nodes.Element()
610            self.state.nested_parse(
611                ViewList([caption], source=''), self.content_offset, parsed)
612            caption_node = nodes.caption(
613                parsed[0].rawsource, '', *parsed[0].children)
614            caption_node.source = parsed[0].source
615            caption_node.line = parsed[0].line
616
617            figure_node = nodes.figure('', node)
618            for k,v in self.options.items():
619                figure_node[k] = v
620            figure_node += caption_node
621
622            node = figure_node
623
624        return node
625
626def add_kernel_figure_to_std_domain(app, doctree):
627    """Add kernel-figure anchors to 'std' domain.
628
629    The ``StandardDomain.process_doc(..)`` method does not know how to resolve
630    the caption (label) of ``kernel-figure`` directive (it only knows about
631    standard nodes, e.g. table, figure etc.). Without any additional handling
632    this will result in a 'undefined label' for kernel-figures.
633
634    This handle adds labels of kernel-figure to the 'std' domain labels.
635    """
636
637    std = app.env.domains["std"]
638    docname = app.env.docname
639    labels = std.data["labels"]
640
641    for name, explicit in doctree.nametypes.items():
642        if not explicit:
643            continue
644        labelid = doctree.nameids[name]
645        if labelid is None:
646            continue
647        node = doctree.ids[labelid]
648
649        if node.tagname == 'kernel_figure':
650            for n in node.next_node():
651                if n.tagname == 'caption':
652                    sectname = clean_astext(n)
653                    # add label to std domain
654                    labels[name] = docname, labelid, sectname
655                    break
656