1""" 2 sphinx.transforms.post_transforms.images 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 5 Docutils transforms used by Sphinx. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11import os 12import re 13from math import ceil 14from typing import Any, Dict, List, Optional, Tuple 15 16from docutils import nodes 17 18from sphinx.application import Sphinx 19from sphinx.locale import __ 20from sphinx.transforms import SphinxTransform 21from sphinx.util import epoch_to_rfc1123, logging, requests, rfc1123_to_epoch, sha1 22from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri 23from sphinx.util.osutil import ensuredir 24 25logger = logging.getLogger(__name__) 26 27MAX_FILENAME_LEN = 32 28CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]') 29 30 31class BaseImageConverter(SphinxTransform): 32 def apply(self, **kwargs: Any) -> None: 33 for node in self.document.traverse(nodes.image): 34 if self.match(node): 35 self.handle(node) 36 37 def match(self, node: nodes.image) -> bool: 38 return True 39 40 def handle(self, node: nodes.image) -> None: 41 pass 42 43 @property 44 def imagedir(self) -> str: 45 return os.path.join(self.app.doctreedir, 'images') 46 47 48class ImageDownloader(BaseImageConverter): 49 default_priority = 100 50 51 def match(self, node: nodes.image) -> bool: 52 if self.app.builder.supported_image_types == []: 53 return False 54 elif self.app.builder.supported_remote_images: 55 return False 56 else: 57 return '://' in node['uri'] 58 59 def handle(self, node: nodes.image) -> None: 60 try: 61 basename = os.path.basename(node['uri']) 62 if '?' in basename: 63 basename = basename.split('?')[0] 64 if basename == '' or len(basename) > MAX_FILENAME_LEN: 65 filename, ext = os.path.splitext(node['uri']) 66 basename = sha1(filename.encode()).hexdigest() + ext 67 basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) 68 69 dirname = node['uri'].replace('://', '/').translate({ord("?"): "/", 70 ord("&"): "/"}) 71 if len(dirname) > MAX_FILENAME_LEN: 72 dirname = sha1(dirname.encode()).hexdigest() 73 ensuredir(os.path.join(self.imagedir, dirname)) 74 path = os.path.join(self.imagedir, dirname, basename) 75 76 headers = {} 77 if os.path.exists(path): 78 timestamp = ceil(os.stat(path).st_mtime) # type: float 79 headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) 80 81 r = requests.get(node['uri'], headers=headers) 82 if r.status_code >= 400: 83 logger.warning(__('Could not fetch remote image: %s [%d]') % 84 (node['uri'], r.status_code)) 85 else: 86 self.app.env.original_image_uri[path] = node['uri'] 87 88 if r.status_code == 200: 89 with open(path, 'wb') as f: 90 f.write(r.content) 91 92 last_modified = r.headers.get('last-modified') 93 if last_modified: 94 timestamp = rfc1123_to_epoch(last_modified) 95 os.utime(path, (timestamp, timestamp)) 96 97 mimetype = guess_mimetype(path, default='*') 98 if mimetype != '*' and os.path.splitext(basename)[1] == '': 99 # append a suffix if URI does not contain suffix 100 ext = get_image_extension(mimetype) 101 newpath = os.path.join(self.imagedir, dirname, basename + ext) 102 os.replace(path, newpath) 103 self.app.env.original_image_uri.pop(path) 104 self.app.env.original_image_uri[newpath] = node['uri'] 105 path = newpath 106 node['candidates'].pop('?') 107 node['candidates'][mimetype] = path 108 node['uri'] = path 109 self.app.env.images.add_file(self.env.docname, path) 110 except Exception as exc: 111 logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc)) 112 113 114class DataURIExtractor(BaseImageConverter): 115 default_priority = 150 116 117 def match(self, node: nodes.image) -> bool: 118 if self.app.builder.supported_remote_images == []: 119 return False 120 elif self.app.builder.supported_data_uri_images is True: 121 return False 122 else: 123 return node['uri'].startswith('data:') 124 125 def handle(self, node: nodes.image) -> None: 126 image = parse_data_uri(node['uri']) 127 ext = get_image_extension(image.mimetype) 128 if ext is None: 129 logger.warning(__('Unknown image format: %s...'), node['uri'][:32], 130 location=node) 131 return 132 133 ensuredir(os.path.join(self.imagedir, 'embeded')) 134 digest = sha1(image.data).hexdigest() 135 path = os.path.join(self.imagedir, 'embeded', digest + ext) 136 self.app.env.original_image_uri[path] = node['uri'] 137 138 with open(path, 'wb') as f: 139 f.write(image.data) 140 141 node['candidates'].pop('?') 142 node['candidates'][image.mimetype] = path 143 node['uri'] = path 144 self.app.env.images.add_file(self.env.docname, path) 145 146 147def get_filename_for(filename: str, mimetype: str) -> str: 148 basename = os.path.basename(filename) 149 basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) 150 return os.path.splitext(basename)[0] + get_image_extension(mimetype) 151 152 153class ImageConverter(BaseImageConverter): 154 """A base class for image converters. 155 156 An image converter is kind of Docutils transform module. It is used to 157 convert image files which does not supported by builder to appropriate 158 format for that builder. 159 160 For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF, 161 PNG and JPEG as image formats. However it does not support SVG images. 162 For such case, to use image converters allows to embed these 163 unsupported images into the document. One of image converters; 164 :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert 165 a SVG image to PNG format using Imagemagick internally. 166 167 There are three steps to make your custom image converter: 168 169 1. Make a subclass of ``ImageConverter`` class 170 2. Override ``conversion_rules``, ``is_available()`` and ``convert()`` 171 3. Register your image converter to Sphinx using 172 :py:meth:`.Sphinx.add_post_transform` 173 """ 174 default_priority = 200 175 176 #: The converter is available or not. Will be filled at the first call of 177 #: the build. The result is shared in the same process. 178 #: 179 #: .. todo:: This should be refactored not to store the state without class 180 #: variable. 181 available = None # type: Optional[bool] 182 183 #: A conversion rules the image converter supports. 184 #: It is represented as a list of pair of source image format (mimetype) and 185 #: destination one:: 186 #: 187 #: conversion_rules = [ 188 #: ('image/svg+xml', 'image/png'), 189 #: ('image/gif', 'image/png'), 190 #: ('application/pdf', 'image/png'), 191 #: ] 192 conversion_rules = [] # type: List[Tuple[str, str]] 193 194 def __init__(self, *args: Any, **kwargs: Any) -> None: 195 super().__init__(*args, **kwargs) 196 197 def match(self, node: nodes.image) -> bool: 198 if not self.app.builder.supported_image_types: 199 return False 200 elif set(node['candidates']) & set(self.app.builder.supported_image_types): 201 # builder supports the image; no need to convert 202 return False 203 elif self.available is None: 204 # store the value to the class variable to share it during the build 205 self.__class__.available = self.is_available() 206 207 if not self.available: 208 return False 209 else: 210 rule = self.get_conversion_rule(node) 211 if rule: 212 return True 213 else: 214 return False 215 216 def get_conversion_rule(self, node: nodes.image) -> Tuple[str, str]: 217 for candidate in self.guess_mimetypes(node): 218 for supported in self.app.builder.supported_image_types: 219 rule = (candidate, supported) 220 if rule in self.conversion_rules: 221 return rule 222 223 return None 224 225 def is_available(self) -> bool: 226 """Return the image converter is available or not.""" 227 raise NotImplementedError() 228 229 def guess_mimetypes(self, node: nodes.image) -> List[str]: 230 if '?' in node['candidates']: 231 return [] 232 elif '*' in node['candidates']: 233 return [guess_mimetype(node['uri'])] 234 else: 235 return node['candidates'].keys() 236 237 def handle(self, node: nodes.image) -> None: 238 _from, _to = self.get_conversion_rule(node) 239 240 if _from in node['candidates']: 241 srcpath = node['candidates'][_from] 242 else: 243 srcpath = node['candidates']['*'] 244 245 filename = get_filename_for(srcpath, _to) 246 ensuredir(self.imagedir) 247 destpath = os.path.join(self.imagedir, filename) 248 249 abs_srcpath = os.path.join(self.app.srcdir, srcpath) 250 if self.convert(abs_srcpath, destpath): 251 if '*' in node['candidates']: 252 node['candidates']['*'] = destpath 253 else: 254 node['candidates'][_to] = destpath 255 node['uri'] = destpath 256 257 self.env.original_image_uri[destpath] = srcpath 258 self.env.images.add_file(self.env.docname, destpath) 259 260 def convert(self, _from: str, _to: str) -> bool: 261 """Convert a image file to expected format. 262 263 *_from* is a path for source image file, and *_to* is a path for 264 destination file. 265 """ 266 raise NotImplementedError() 267 268 269def setup(app: Sphinx) -> Dict[str, Any]: 270 app.add_post_transform(ImageDownloader) 271 app.add_post_transform(DataURIExtractor) 272 273 return { 274 'version': 'builtin', 275 'parallel_read_safe': True, 276 'parallel_write_safe': True, 277 } 278