1"""
2Provides a collection of utilities for comparing (image) results.
3
4"""
5from __future__ import absolute_import, division, print_function
6
7import six
8
9import atexit
10import functools
11import hashlib
12import itertools
13import os
14import re
15import shutil
16import sys
17from tempfile import TemporaryFile
18
19import numpy as np
20
21import matplotlib
22from matplotlib.compat import subprocess
23from matplotlib.testing.exceptions import ImageComparisonFailure
24from matplotlib import _png
25from matplotlib import _get_cachedir
26from matplotlib import cbook
27
28__all__ = ['compare_float', 'compare_images', 'comparable_formats']
29
30
31def make_test_filename(fname, purpose):
32    """
33    Make a new filename by inserting `purpose` before the file's
34    extension.
35    """
36    base, ext = os.path.splitext(fname)
37    return '%s-%s%s' % (base, purpose, ext)
38
39
40def compare_float(expected, actual, relTol=None, absTol=None):
41    """
42    Fail if the floating point values are not close enough, with
43    the given message.
44
45    You can specify a relative tolerance, absolute tolerance, or both.
46
47    """
48    if relTol is None and absTol is None:
49        raise ValueError("You haven't specified a 'relTol' relative "
50                         "tolerance or a 'absTol' absolute tolerance "
51                         "function argument. You must specify one.")
52    msg = ""
53
54    if absTol is not None:
55        absDiff = abs(expected - actual)
56        if absTol < absDiff:
57            template = ['',
58                        'Expected: {expected}',
59                        'Actual:   {actual}',
60                        'Abs diff: {absDiff}',
61                        'Abs tol:  {absTol}']
62            msg += '\n  '.join([line.format(**locals()) for line in template])
63
64    if relTol is not None:
65        # The relative difference of the two values.  If the expected value is
66        # zero, then return the absolute value of the difference.
67        relDiff = abs(expected - actual)
68        if expected:
69            relDiff = relDiff / abs(expected)
70
71        if relTol < relDiff:
72            # The relative difference is a ratio, so it's always unit-less.
73            template = ['',
74                        'Expected: {expected}',
75                        'Actual:   {actual}',
76                        'Rel diff: {relDiff}',
77                        'Rel tol:  {relTol}']
78            msg += '\n  '.join([line.format(**locals()) for line in template])
79
80    return msg or None
81
82
83def get_cache_dir():
84    cachedir = _get_cachedir()
85    if cachedir is None:
86        raise RuntimeError('Could not find a suitable configuration directory')
87    cache_dir = os.path.join(cachedir, 'test_cache')
88    if not os.path.exists(cache_dir):
89        try:
90            cbook.mkdirs(cache_dir)
91        except IOError:
92            return None
93    if not os.access(cache_dir, os.W_OK):
94        return None
95    return cache_dir
96
97
98def get_file_hash(path, block_size=2 ** 20):
99    md5 = hashlib.md5()
100    with open(path, 'rb') as fd:
101        while True:
102            data = fd.read(block_size)
103            if not data:
104                break
105            md5.update(data)
106
107    if path.endswith('.pdf'):
108        from matplotlib import checkdep_ghostscript
109        md5.update(checkdep_ghostscript()[1].encode('utf-8'))
110    elif path.endswith('.svg'):
111        from matplotlib import checkdep_inkscape
112        md5.update(checkdep_inkscape().encode('utf-8'))
113
114    return md5.hexdigest()
115
116
117def make_external_conversion_command(cmd):
118    def convert(old, new):
119        cmdline = cmd(old, new)
120        pipe = subprocess.Popen(cmdline, universal_newlines=True,
121                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
122        stdout, stderr = pipe.communicate()
123        errcode = pipe.wait()
124        if not os.path.exists(new) or errcode:
125            msg = "Conversion command failed:\n%s\n" % ' '.join(cmdline)
126            if stdout:
127                msg += "Standard output:\n%s\n" % stdout
128            if stderr:
129                msg += "Standard error:\n%s\n" % stderr
130            raise IOError(msg)
131
132    return convert
133
134
135# Modified from https://bugs.python.org/issue25567.
136_find_unsafe_bytes = re.compile(br'[^a-zA-Z0-9_@%+=:,./-]').search
137
138
139def _shlex_quote_bytes(b):
140    return (b if _find_unsafe_bytes(b) is None
141            else b"'" + b.replace(b"'", b"'\"'\"'") + b"'")
142
143
144class _SVGConverter(object):
145    def __init__(self):
146        self._proc = None
147        # We cannot rely on the GC to trigger `__del__` at exit because
148        # other modules (e.g. `subprocess`) may already have their globals
149        # set to `None`, which make `proc.communicate` or `proc.terminate`
150        # fail.  By relying on `atexit` we ensure the destructor runs before
151        # `None`-setting occurs.
152        atexit.register(self.__del__)
153
154    def _read_to_prompt(self):
155        """Did Inkscape reach the prompt without crashing?
156        """
157        stream = iter(functools.partial(self._proc.stdout.read, 1), b"")
158        prompt = (b"\n", b">")
159        n = len(prompt)
160        its = itertools.tee(stream, n)
161        for i, it in enumerate(its):
162            next(itertools.islice(it, i, i), None)  # Advance `it` by `i`.
163        while True:
164            window = tuple(map(next, its))
165            if len(window) != n:
166                # Ran out of data -- one of the `next(it)` raised
167                # StopIteration, so the tuple is shorter.
168                return False
169            if self._proc.poll() is not None:
170                # Inkscape exited.
171                return False
172            if window == prompt:
173                # Successfully read until prompt.
174                return True
175
176    def __call__(self, orig, dest):
177        if (not self._proc  # First run.
178                or self._proc.poll() is not None):  # Inkscape terminated.
179            env = os.environ.copy()
180            # If one passes e.g. a png file to Inkscape, it will try to
181            # query the user for conversion options via a GUI (even with
182            # `--without-gui`).  Unsetting `DISPLAY` prevents this (and causes
183            # GTK to crash and Inkscape to terminate, but that'll just be
184            # reported as a regular exception below).
185            env.pop("DISPLAY", None)  # May already be unset.
186            # Do not load any user options.
187            # `os.environ` needs native strings on Py2+Windows.
188            env[str("INKSCAPE_PROFILE_DIR")] = os.devnull
189            # Old versions of Inkscape (0.48.3.1, used on Travis as of now)
190            # seem to sometimes deadlock when stderr is redirected to a pipe,
191            # so we redirect it to a temporary file instead.  This is not
192            # necessary anymore as of Inkscape 0.92.1.
193            self._stderr = TemporaryFile()
194            self._proc = subprocess.Popen(
195                [str("inkscape"), "--without-gui", "--shell"],
196                stdin=subprocess.PIPE, stdout=subprocess.PIPE,
197                stderr=self._stderr, env=env)
198            if not self._read_to_prompt():
199                raise OSError("Failed to start Inkscape")
200
201        try:
202            fsencode = os.fsencode
203        except AttributeError:  # Py2.
204            def fsencode(s):
205                return s.encode(sys.getfilesystemencoding())
206
207        # Inkscape uses glib's `g_shell_parse_argv`, which has a consistent
208        # behavior across platforms, so we can just use `shlex.quote`.
209        orig_b, dest_b = map(_shlex_quote_bytes, map(fsencode, [orig, dest]))
210        if b"\n" in orig_b or b"\n" in dest_b:
211            # Who knows whether the current folder name has a newline, or if
212            # our encoding is even ASCII compatible...  Just fall back on the
213            # slow solution (Inkscape uses `fgets` so it will always stop at a
214            # newline).
215            return make_external_conversion_command(lambda old, new: [
216                str('inkscape'), '-z', old, '--export-png', new])(orig, dest)
217        self._proc.stdin.write(orig_b + b" --export-png=" + dest_b + b"\n")
218        self._proc.stdin.flush()
219        if not self._read_to_prompt():
220            # Inkscape's output is not localized but gtk's is, so the
221            # output stream probably has a mixed encoding.  Using
222            # `getfilesystemencoding` should at least get the filenames
223            # right...
224            self._stderr.seek(0)
225            raise ImageComparisonFailure(
226                self._stderr.read().decode(
227                    sys.getfilesystemencoding(), "replace"))
228
229    def __del__(self):
230        if self._proc:
231            if self._proc.poll() is None:  # Not exited yet.
232                self._proc.communicate(b"quit\n")
233                self._proc.wait()
234            self._proc.stdin.close()
235            self._proc.stdout.close()
236            self._stderr.close()
237
238
239def _update_converter():
240    gs, gs_v = matplotlib.checkdep_ghostscript()
241    if gs_v is not None:
242        def cmd(old, new):
243            return [str(gs), '-q', '-sDEVICE=png16m', '-dNOPAUSE', '-dBATCH',
244             '-sOutputFile=' + new, old]
245        converter['pdf'] = make_external_conversion_command(cmd)
246        converter['eps'] = make_external_conversion_command(cmd)
247
248    if matplotlib.checkdep_inkscape() is not None:
249        converter['svg'] = _SVGConverter()
250
251
252#: A dictionary that maps filename extensions to functions which
253#: themselves map arguments `old` and `new` (filenames) to a list of strings.
254#: The list can then be passed to Popen to convert files with that
255#: extension to png format.
256converter = {}
257_update_converter()
258
259
260def comparable_formats():
261    """
262    Returns the list of file formats that compare_images can compare
263    on this system.
264
265    """
266    return ['png'] + list(converter)
267
268
269def convert(filename, cache):
270    """
271    Convert the named file into a png file.  Returns the name of the
272    created file.
273
274    If *cache* is True, the result of the conversion is cached in
275    `matplotlib._get_cachedir() + '/test_cache/'`.  The caching is based
276    on a hash of the exact contents of the input file.  The is no limit
277    on the size of the cache, so it may need to be manually cleared
278    periodically.
279
280    """
281    base, extension = filename.rsplit('.', 1)
282    if extension not in converter:
283        reason = "Don't know how to convert %s files to png" % extension
284        from . import is_called_from_pytest
285        if is_called_from_pytest():
286            import pytest
287            pytest.skip(reason)
288        else:
289            from nose import SkipTest
290            raise SkipTest(reason)
291    newname = base + '_' + extension + '.png'
292    if not os.path.exists(filename):
293        raise IOError("'%s' does not exist" % filename)
294
295    # Only convert the file if the destination doesn't already exist or
296    # is out of date.
297    if (not os.path.exists(newname) or
298            os.stat(newname).st_mtime < os.stat(filename).st_mtime):
299        if cache:
300            cache_dir = get_cache_dir()
301        else:
302            cache_dir = None
303
304        if cache_dir is not None:
305            hash_value = get_file_hash(filename)
306            new_ext = os.path.splitext(newname)[1]
307            cached_file = os.path.join(cache_dir, hash_value + new_ext)
308            if os.path.exists(cached_file):
309                shutil.copyfile(cached_file, newname)
310                return newname
311
312        converter[extension](filename, newname)
313
314        if cache_dir is not None:
315            shutil.copyfile(newname, cached_file)
316
317    return newname
318
319#: Maps file extensions to a function which takes a filename as its
320#: only argument to return a list suitable for execution with Popen.
321#: The purpose of this is so that the result file (with the given
322#: extension) can be verified with tools such as xmllint for svg.
323verifiers = {}
324
325# Turning this off, because it seems to cause multiprocessing issues
326if False and matplotlib.checkdep_xmllint():
327    verifiers['svg'] = lambda filename: [
328        'xmllint', '--valid', '--nowarning', '--noout', filename]
329
330
331@cbook.deprecated("2.1")
332def verify(filename):
333    """Verify the file through some sort of verification tool."""
334    if not os.path.exists(filename):
335        raise IOError("'%s' does not exist" % filename)
336    base, extension = filename.rsplit('.', 1)
337    verifier = verifiers.get(extension, None)
338    if verifier is not None:
339        cmd = verifier(filename)
340        pipe = subprocess.Popen(cmd, universal_newlines=True,
341                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
342        stdout, stderr = pipe.communicate()
343        errcode = pipe.wait()
344        if errcode != 0:
345            msg = "File verification command failed:\n%s\n" % ' '.join(cmd)
346            if stdout:
347                msg += "Standard output:\n%s\n" % stdout
348            if stderr:
349                msg += "Standard error:\n%s\n" % stderr
350            raise IOError(msg)
351
352
353def crop_to_same(actual_path, actual_image, expected_path, expected_image):
354    # clip the images to the same size -- this is useful only when
355    # comparing eps to pdf
356    if actual_path[-7:-4] == 'eps' and expected_path[-7:-4] == 'pdf':
357        aw, ah, ad = actual_image.shape
358        ew, eh, ed = expected_image.shape
359        actual_image = actual_image[int(aw / 2 - ew / 2):int(
360            aw / 2 + ew / 2), int(ah / 2 - eh / 2):int(ah / 2 + eh / 2)]
361    return actual_image, expected_image
362
363
364def calculate_rms(expectedImage, actualImage):
365    "Calculate the per-pixel errors, then compute the root mean square error."
366    if expectedImage.shape != actualImage.shape:
367        raise ImageComparisonFailure(
368            "Image sizes do not match expected size: {0} "
369            "actual size {1}".format(expectedImage.shape, actualImage.shape))
370    # Convert to float to avoid overflowing finite integer types.
371    return np.sqrt(((expectedImage - actualImage).astype(float) ** 2).mean())
372
373
374def compare_images(expected, actual, tol, in_decorator=False):
375    """
376    Compare two "image" files checking differences within a tolerance.
377
378    The two given filenames may point to files which are convertible to
379    PNG via the `.converter` dictionary. The underlying RMS is calculated
380    with the `.calculate_rms` function.
381
382    Parameters
383    ----------
384    expected : str
385        The filename of the expected image.
386    actual :str
387        The filename of the actual image.
388    tol : float
389        The tolerance (a color value difference, where 255 is the
390        maximal difference).  The test fails if the average pixel
391        difference is greater than this value.
392    in_decorator : bool
393        If called from image_comparison decorator, this should be
394        True. (default=False)
395
396    Examples
397    --------
398    img1 = "./baseline/plot.png"
399    img2 = "./output/plot.png"
400    compare_images( img1, img2, 0.001 ):
401
402    """
403    if not os.path.exists(actual):
404        raise Exception("Output image %s does not exist." % actual)
405
406    if os.stat(actual).st_size == 0:
407        raise Exception("Output image file %s is empty." % actual)
408
409    # Convert the image to png
410    extension = expected.split('.')[-1]
411
412    if not os.path.exists(expected):
413        raise IOError('Baseline image %r does not exist.' % expected)
414
415    if extension != 'png':
416        actual = convert(actual, False)
417        expected = convert(expected, True)
418
419    # open the image files and remove the alpha channel (if it exists)
420    expectedImage = _png.read_png_int(expected)
421    actualImage = _png.read_png_int(actual)
422    expectedImage = expectedImage[:, :, :3]
423    actualImage = actualImage[:, :, :3]
424
425    actualImage, expectedImage = crop_to_same(
426        actual, actualImage, expected, expectedImage)
427
428    diff_image = make_test_filename(actual, 'failed-diff')
429
430    if tol <= 0.0:
431        if np.array_equal(expectedImage, actualImage):
432            return None
433
434    # convert to signed integers, so that the images can be subtracted without
435    # overflow
436    expectedImage = expectedImage.astype(np.int16)
437    actualImage = actualImage.astype(np.int16)
438
439    rms = calculate_rms(expectedImage, actualImage)
440
441    if rms <= tol:
442        return None
443
444    save_diff_image(expected, actual, diff_image)
445
446    results = dict(rms=rms, expected=str(expected),
447                   actual=str(actual), diff=str(diff_image), tol=tol)
448
449    if not in_decorator:
450        # Then the results should be a string suitable for stdout.
451        template = ['Error: Image files did not match.',
452                    'RMS Value: {rms}',
453                    'Expected:  \n    {expected}',
454                    'Actual:    \n    {actual}',
455                    'Difference:\n    {diff}',
456                    'Tolerance: \n    {tol}', ]
457        results = '\n  '.join([line.format(**results) for line in template])
458    return results
459
460
461def save_diff_image(expected, actual, output):
462    expectedImage = _png.read_png(expected)
463    actualImage = _png.read_png(actual)
464    actualImage, expectedImage = crop_to_same(
465        actual, actualImage, expected, expectedImage)
466    expectedImage = np.array(expectedImage).astype(float)
467    actualImage = np.array(actualImage).astype(float)
468    if expectedImage.shape != actualImage.shape:
469        raise ImageComparisonFailure(
470            "Image sizes do not match expected size: {0} "
471            "actual size {1}".format(expectedImage.shape, actualImage.shape))
472    absDiffImage = np.abs(expectedImage - actualImage)
473
474    # expand differences in luminance domain
475    absDiffImage *= 255 * 10
476    save_image_np = np.clip(absDiffImage, 0, 255).astype(np.uint8)
477    height, width, depth = save_image_np.shape
478
479    # The PDF renderer doesn't produce an alpha channel, but the
480    # matplotlib PNG writer requires one, so expand the array
481    if depth == 3:
482        with_alpha = np.empty((height, width, 4), dtype=np.uint8)
483        with_alpha[:, :, 0:3] = save_image_np
484        save_image_np = with_alpha
485
486    # Hard-code the alpha channel to fully solid
487    save_image_np[:, :, 3] = 255
488
489    _png.write_png(save_image_np, output)
490