1"""
2    sphinx.transforms.post_transforms.images
3    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5    Docutils transforms used by Sphinx.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11import os
12import re
13from math import ceil
14from typing import Any, Dict, List, Optional, Tuple
15
16from docutils import nodes
17
18from sphinx.application import Sphinx
19from sphinx.locale import __
20from sphinx.transforms import SphinxTransform
21from sphinx.util import epoch_to_rfc1123, logging, requests, rfc1123_to_epoch, sha1
22from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri
23from sphinx.util.osutil import ensuredir
24
25logger = logging.getLogger(__name__)
26
27MAX_FILENAME_LEN = 32
28CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]')
29
30
31class BaseImageConverter(SphinxTransform):
32    def apply(self, **kwargs: Any) -> None:
33        for node in self.document.traverse(nodes.image):
34            if self.match(node):
35                self.handle(node)
36
37    def match(self, node: nodes.image) -> bool:
38        return True
39
40    def handle(self, node: nodes.image) -> None:
41        pass
42
43    @property
44    def imagedir(self) -> str:
45        return os.path.join(self.app.doctreedir, 'images')
46
47
48class ImageDownloader(BaseImageConverter):
49    default_priority = 100
50
51    def match(self, node: nodes.image) -> bool:
52        if self.app.builder.supported_image_types == []:
53            return False
54        elif self.app.builder.supported_remote_images:
55            return False
56        else:
57            return '://' in node['uri']
58
59    def handle(self, node: nodes.image) -> None:
60        try:
61            basename = os.path.basename(node['uri'])
62            if '?' in basename:
63                basename = basename.split('?')[0]
64            if basename == '' or len(basename) > MAX_FILENAME_LEN:
65                filename, ext = os.path.splitext(node['uri'])
66                basename = sha1(filename.encode()).hexdigest() + ext
67            basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename)
68
69            dirname = node['uri'].replace('://', '/').translate({ord("?"): "/",
70                                                                 ord("&"): "/"})
71            if len(dirname) > MAX_FILENAME_LEN:
72                dirname = sha1(dirname.encode()).hexdigest()
73            ensuredir(os.path.join(self.imagedir, dirname))
74            path = os.path.join(self.imagedir, dirname, basename)
75
76            headers = {}
77            if os.path.exists(path):
78                timestamp = ceil(os.stat(path).st_mtime)  # type: float
79                headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp)
80
81            r = requests.get(node['uri'], headers=headers)
82            if r.status_code >= 400:
83                logger.warning(__('Could not fetch remote image: %s [%d]') %
84                               (node['uri'], r.status_code))
85            else:
86                self.app.env.original_image_uri[path] = node['uri']
87
88                if r.status_code == 200:
89                    with open(path, 'wb') as f:
90                        f.write(r.content)
91
92                last_modified = r.headers.get('last-modified')
93                if last_modified:
94                    timestamp = rfc1123_to_epoch(last_modified)
95                    os.utime(path, (timestamp, timestamp))
96
97                mimetype = guess_mimetype(path, default='*')
98                if mimetype != '*' and os.path.splitext(basename)[1] == '':
99                    # append a suffix if URI does not contain suffix
100                    ext = get_image_extension(mimetype)
101                    newpath = os.path.join(self.imagedir, dirname, basename + ext)
102                    os.replace(path, newpath)
103                    self.app.env.original_image_uri.pop(path)
104                    self.app.env.original_image_uri[newpath] = node['uri']
105                    path = newpath
106                node['candidates'].pop('?')
107                node['candidates'][mimetype] = path
108                node['uri'] = path
109                self.app.env.images.add_file(self.env.docname, path)
110        except Exception as exc:
111            logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc))
112
113
114class DataURIExtractor(BaseImageConverter):
115    default_priority = 150
116
117    def match(self, node: nodes.image) -> bool:
118        if self.app.builder.supported_remote_images == []:
119            return False
120        elif self.app.builder.supported_data_uri_images is True:
121            return False
122        else:
123            return node['uri'].startswith('data:')
124
125    def handle(self, node: nodes.image) -> None:
126        image = parse_data_uri(node['uri'])
127        ext = get_image_extension(image.mimetype)
128        if ext is None:
129            logger.warning(__('Unknown image format: %s...'), node['uri'][:32],
130                           location=node)
131            return
132
133        ensuredir(os.path.join(self.imagedir, 'embeded'))
134        digest = sha1(image.data).hexdigest()
135        path = os.path.join(self.imagedir, 'embeded', digest + ext)
136        self.app.env.original_image_uri[path] = node['uri']
137
138        with open(path, 'wb') as f:
139            f.write(image.data)
140
141        node['candidates'].pop('?')
142        node['candidates'][image.mimetype] = path
143        node['uri'] = path
144        self.app.env.images.add_file(self.env.docname, path)
145
146
147def get_filename_for(filename: str, mimetype: str) -> str:
148    basename = os.path.basename(filename)
149    basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename)
150    return os.path.splitext(basename)[0] + get_image_extension(mimetype)
151
152
153class ImageConverter(BaseImageConverter):
154    """A base class for image converters.
155
156    An image converter is kind of Docutils transform module.  It is used to
157    convert image files which does not supported by builder to appropriate
158    format for that builder.
159
160    For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF,
161    PNG and JPEG as image formats.  However it does not support SVG images.
162    For such case, to use image converters allows to embed these
163    unsupported images into the document.  One of image converters;
164    :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert
165    a SVG image to PNG format using Imagemagick internally.
166
167    There are three steps to make your custom image converter:
168
169    1. Make a subclass of ``ImageConverter`` class
170    2. Override ``conversion_rules``, ``is_available()`` and ``convert()``
171    3. Register your image converter to Sphinx using
172       :py:meth:`.Sphinx.add_post_transform`
173    """
174    default_priority = 200
175
176    #: The converter is available or not.  Will be filled at the first call of
177    #: the build.  The result is shared in the same process.
178    #:
179    #: .. todo:: This should be refactored not to store the state without class
180    #:           variable.
181    available = None  # type: Optional[bool]
182
183    #: A conversion rules the image converter supports.
184    #: It is represented as a list of pair of source image format (mimetype) and
185    #: destination one::
186    #:
187    #:     conversion_rules = [
188    #:         ('image/svg+xml', 'image/png'),
189    #:         ('image/gif', 'image/png'),
190    #:         ('application/pdf', 'image/png'),
191    #:     ]
192    conversion_rules = []  # type: List[Tuple[str, str]]
193
194    def __init__(self, *args: Any, **kwargs: Any) -> None:
195        super().__init__(*args, **kwargs)
196
197    def match(self, node: nodes.image) -> bool:
198        if not self.app.builder.supported_image_types:
199            return False
200        elif set(node['candidates']) & set(self.app.builder.supported_image_types):
201            # builder supports the image; no need to convert
202            return False
203        elif self.available is None:
204            # store the value to the class variable to share it during the build
205            self.__class__.available = self.is_available()
206
207        if not self.available:
208            return False
209        else:
210            rule = self.get_conversion_rule(node)
211            if rule:
212                return True
213            else:
214                return False
215
216    def get_conversion_rule(self, node: nodes.image) -> Tuple[str, str]:
217        for candidate in self.guess_mimetypes(node):
218            for supported in self.app.builder.supported_image_types:
219                rule = (candidate, supported)
220                if rule in self.conversion_rules:
221                    return rule
222
223        return None
224
225    def is_available(self) -> bool:
226        """Return the image converter is available or not."""
227        raise NotImplementedError()
228
229    def guess_mimetypes(self, node: nodes.image) -> List[str]:
230        if '?' in node['candidates']:
231            return []
232        elif '*' in node['candidates']:
233            return [guess_mimetype(node['uri'])]
234        else:
235            return node['candidates'].keys()
236
237    def handle(self, node: nodes.image) -> None:
238        _from, _to = self.get_conversion_rule(node)
239
240        if _from in node['candidates']:
241            srcpath = node['candidates'][_from]
242        else:
243            srcpath = node['candidates']['*']
244
245        filename = get_filename_for(srcpath, _to)
246        ensuredir(self.imagedir)
247        destpath = os.path.join(self.imagedir, filename)
248
249        abs_srcpath = os.path.join(self.app.srcdir, srcpath)
250        if self.convert(abs_srcpath, destpath):
251            if '*' in node['candidates']:
252                node['candidates']['*'] = destpath
253            else:
254                node['candidates'][_to] = destpath
255            node['uri'] = destpath
256
257            self.env.original_image_uri[destpath] = srcpath
258            self.env.images.add_file(self.env.docname, destpath)
259
260    def convert(self, _from: str, _to: str) -> bool:
261        """Convert a image file to expected format.
262
263        *_from* is a path for source image file, and *_to* is a path for
264        destination file.
265        """
266        raise NotImplementedError()
267
268
269def setup(app: Sphinx) -> Dict[str, Any]:
270    app.add_post_transform(ImageDownloader)
271    app.add_post_transform(DataURIExtractor)
272
273    return {
274        'version': 'builtin',
275        'parallel_read_safe': True,
276        'parallel_write_safe': True,
277    }
278