1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2import pytest
3import numpy as np
4
5from astropy.io.fits.column import Column
6from astropy.io.fits.diff import (FITSDiff, HeaderDiff, ImageDataDiff,
7                                  TableDataDiff, HDUDiff)
8from astropy.io.fits.hdu import HDUList, PrimaryHDU, ImageHDU
9from astropy.io.fits.hdu.base import NonstandardExtHDU
10from astropy.io.fits.hdu.table import BinTableHDU
11from astropy.io.fits.header import Header
12
13from astropy.utils.exceptions import AstropyDeprecationWarning
14from astropy.utils.misc import _NOT_OVERWRITING_MSG_MATCH
15from astropy.io import fits
16
17from . import FitsTestCase
18
19
20class DummyNonstandardExtHDU(NonstandardExtHDU):
21
22    def __init__(self, data=None, *args, **kwargs):
23        super().__init__(self, *args, **kwargs)
24        self._buffer = np.asarray(data).tobytes()
25        self._data_offset = 0
26
27    @property
28    def size(self):
29        return len(self._buffer)
30
31
32class TestDiff(FitsTestCase):
33    def test_identical_headers(self):
34        ha = Header([('A', 1), ('B', 2), ('C', 3)])
35        hb = ha.copy()
36        assert HeaderDiff(ha, hb).identical
37        assert HeaderDiff(ha.tostring(), hb.tostring()).identical
38
39        with pytest.raises(TypeError):
40            HeaderDiff(1, 2)
41
42    def test_slightly_different_headers(self):
43        ha = Header([('A', 1), ('B', 2), ('C', 3)])
44        hb = ha.copy()
45        hb['C'] = 4
46        assert not HeaderDiff(ha, hb).identical
47
48    def test_common_keywords(self):
49        ha = Header([('A', 1), ('B', 2), ('C', 3)])
50        hb = ha.copy()
51        hb['C'] = 4
52        hb['D'] = (5, 'Comment')
53        assert HeaderDiff(ha, hb).common_keywords == ['A', 'B', 'C']
54
55    def test_different_keyword_count(self):
56        ha = Header([('A', 1), ('B', 2), ('C', 3)])
57        hb = ha.copy()
58        del hb['B']
59        diff = HeaderDiff(ha, hb)
60        assert not diff.identical
61        assert diff.diff_keyword_count == (3, 2)
62
63        # But make sure the common keywords are at least correct
64        assert diff.common_keywords == ['A', 'C']
65
66    def test_different_keywords(self):
67        ha = Header([('A', 1), ('B', 2), ('C', 3)])
68        hb = ha.copy()
69        hb['C'] = 4
70        hb['D'] = (5, 'Comment')
71        ha['E'] = (6, 'Comment')
72        ha['F'] = (7, 'Comment')
73        diff = HeaderDiff(ha, hb)
74        assert not diff.identical
75        assert diff.diff_keywords == (['E', 'F'], ['D'])
76
77    def test_different_keyword_values(self):
78        ha = Header([('A', 1), ('B', 2), ('C', 3)])
79        hb = ha.copy()
80        hb['C'] = 4
81        diff = HeaderDiff(ha, hb)
82        assert not diff.identical
83        assert diff.diff_keyword_values == {'C': [(3, 4)]}
84
85    def test_different_keyword_comments(self):
86        ha = Header([('A', 1), ('B', 2), ('C', 3, 'comment 1')])
87        hb = ha.copy()
88        hb.comments['C'] = 'comment 2'
89        diff = HeaderDiff(ha, hb)
90        assert not diff.identical
91        assert (diff.diff_keyword_comments ==
92                {'C': [('comment 1', 'comment 2')]})
93
94    def test_different_keyword_values_with_duplicate(self):
95        ha = Header([('A', 1), ('B', 2), ('C', 3)])
96        hb = ha.copy()
97        ha.append(('C', 4))
98        hb.append(('C', 5))
99        diff = HeaderDiff(ha, hb)
100        assert not diff.identical
101        assert diff.diff_keyword_values == {'C': [None, (4, 5)]}
102
103    def test_asymmetric_duplicate_keywords(self):
104        ha = Header([('A', 1), ('B', 2), ('C', 3)])
105        hb = ha.copy()
106        ha.append(('A', 2, 'comment 1'))
107        ha.append(('A', 3, 'comment 2'))
108        hb.append(('B', 4, 'comment 3'))
109        hb.append(('C', 5, 'comment 4'))
110        diff = HeaderDiff(ha, hb)
111        assert not diff.identical
112        assert diff.diff_keyword_values == {}
113        assert (diff.diff_duplicate_keywords ==
114                {'A': (3, 1), 'B': (1, 2), 'C': (1, 2)})
115
116        report = diff.report()
117        assert ("Inconsistent duplicates of keyword 'A'     :\n"
118                "  Occurs 3 time(s) in a, 1 times in (b)") in report
119
120    def test_floating_point_rtol(self):
121        ha = Header([('A', 1), ('B', 2.00001), ('C', 3.000001)])
122        hb = ha.copy()
123        hb['B'] = 2.00002
124        hb['C'] = 3.000002
125        diff = HeaderDiff(ha, hb)
126        assert not diff.identical
127        assert (diff.diff_keyword_values ==
128                {'B': [(2.00001, 2.00002)], 'C': [(3.000001, 3.000002)]})
129        diff = HeaderDiff(ha, hb, rtol=1e-6)
130        assert not diff.identical
131        assert diff.diff_keyword_values == {'B': [(2.00001, 2.00002)]}
132        diff = HeaderDiff(ha, hb, rtol=1e-5)
133        assert diff.identical
134
135    def test_floating_point_atol(self):
136        ha = Header([('A', 1), ('B', 1.0), ('C', 0.0)])
137        hb = ha.copy()
138        hb['B'] = 1.00001
139        hb['C'] = 0.000001
140        diff = HeaderDiff(ha, hb, rtol=1e-6)
141        assert not diff.identical
142        assert (diff.diff_keyword_values ==
143                {'B': [(1.0, 1.00001)], 'C': [(0.0, 0.000001)]})
144        diff = HeaderDiff(ha, hb, rtol=1e-5)
145        assert not diff.identical
146        assert (diff.diff_keyword_values ==
147                {'C': [(0.0, 0.000001)]})
148        diff = HeaderDiff(ha, hb, atol=1e-6)
149        assert not diff.identical
150        assert (diff.diff_keyword_values ==
151                {'B': [(1.0, 1.00001)]})
152        diff = HeaderDiff(ha, hb, atol=1e-5)  # strict inequality
153        assert not diff.identical
154        assert (diff.diff_keyword_values ==
155                {'B': [(1.0, 1.00001)]})
156        diff = HeaderDiff(ha, hb, rtol=1e-5, atol=1e-5)
157        assert diff.identical
158        diff = HeaderDiff(ha, hb, atol=1.1e-5)
159        assert diff.identical
160        diff = HeaderDiff(ha, hb, rtol=1e-6, atol=1e-6)
161        assert not diff.identical
162
163    def test_ignore_blanks(self):
164        with fits.conf.set_temp('strip_header_whitespace', False):
165            ha = Header([('A', 1), ('B', 2), ('C', 'A       ')])
166            hb = ha.copy()
167            hb['C'] = 'A'
168            assert ha['C'] != hb['C']
169
170            diff = HeaderDiff(ha, hb)
171            # Trailing blanks are ignored by default
172            assert diff.identical
173            assert diff.diff_keyword_values == {}
174
175            # Don't ignore blanks
176            diff = HeaderDiff(ha, hb, ignore_blanks=False)
177            assert not diff.identical
178            assert diff.diff_keyword_values == {'C': [('A       ', 'A')]}
179
180    @pytest.mark.parametrize("differ", [HeaderDiff, HDUDiff, FITSDiff])
181    def test_ignore_blank_cards(self, differ):
182        """Test for https://aeon.stsci.edu/ssb/trac/pyfits/ticket/152
183
184        Ignore blank cards.
185        """
186
187        ha = Header([('A', 1), ('B', 2), ('C', 3)])
188        hb = Header([('A', 1), ('', ''), ('B', 2), ('', ''), ('C', 3)])
189        hc = ha.copy()
190        if differ is HeaderDiff:
191            hc.append()
192            hc.append()
193        else:  # Ensure blanks are not at the end as they are stripped by HDUs
194            hc.add_blank(after=-2)
195            hc.add_blank(after=-2)
196
197        if differ in (HDUDiff, FITSDiff):  # wrap it in a PrimaryHDU
198            ha, hb, hc = (PrimaryHDU(np.arange(10), h) for h in (ha, hb, hc))
199            hc_header = hc.header
200        if differ is FITSDiff:  # wrap it in a HDUList
201            ha, hb, hc = (HDUList([h]) for h in (ha, hb, hc))
202            hc_header = hc[0].header
203
204        # We now have a header with interleaved blanks, and a header with end
205        # blanks, both of which should ignore the blanks
206        assert differ(ha, hb).identical
207        assert differ(ha, hc).identical
208        assert differ(hb, hc).identical
209
210        assert not differ(ha, hb, ignore_blank_cards=False).identical
211        assert not differ(ha, hc, ignore_blank_cards=False).identical
212
213        # Both hb and hc have the same number of blank cards; since order is
214        # currently ignored, these should still be identical even if blank
215        # cards are not ignored
216        assert differ(hb, hc, ignore_blank_cards=False).identical
217
218        if differ is HeaderDiff:
219            hc.append()
220        else:  # Ensure blanks are not at the end as they are stripped by HDUs
221            hc_header.add_blank(after=-2)
222        # But now there are different numbers of blanks, so they should not be
223        # ignored:
224        assert not differ(hb, hc, ignore_blank_cards=False).identical
225
226    def test_ignore_hdus(self):
227        a = np.arange(100).reshape(10, 10)
228        b = a.copy()
229        ha = Header([('A', 1), ('B', 2), ('C', 3)])
230        xa = np.array([(1.0, 1), (3.0, 4)], dtype=[('x', float), ('y', int)])
231        xb = np.array([(1.0, 2), (3.0, 5)], dtype=[('x', float), ('y', int)])
232        phdu = PrimaryHDU(header=ha)
233        ihdua = ImageHDU(data=a, name='SCI')
234        ihdub = ImageHDU(data=b, name='SCI')
235        bhdu1 = BinTableHDU(data=xa, name='ASDF')
236        bhdu2 = BinTableHDU(data=xb, name='ASDF')
237        hdula = HDUList([phdu, ihdua, bhdu1])
238        hdulb = HDUList([phdu, ihdub, bhdu2])
239
240        # ASDF extension should be different
241        diff = FITSDiff(hdula, hdulb)
242        assert not diff.identical
243        assert diff.diff_hdus[0][0] == 2
244
245        # ASDF extension should be ignored
246        diff = FITSDiff(hdula, hdulb, ignore_hdus=['ASDF'])
247        assert diff.identical, diff.report()
248
249        diff = FITSDiff(hdula, hdulb, ignore_hdus=['ASD*'])
250        assert diff.identical, diff.report()
251
252        # SCI extension should be different
253        hdulb['SCI'].data += 1
254        diff = FITSDiff(hdula, hdulb, ignore_hdus=['ASDF'])
255        assert not diff.identical
256
257        # SCI and ASDF extensions should be ignored
258        diff = FITSDiff(hdula, hdulb, ignore_hdus=['SCI', 'ASDF'])
259        assert diff.identical, diff.report()
260
261        # All EXTVER of SCI should be ignored
262        ihduc = ImageHDU(data=a, name='SCI', ver=2)
263        hdulb.append(ihduc)
264        diff = FITSDiff(hdula, hdulb, ignore_hdus=['SCI', 'ASDF'])
265        assert not any(diff.diff_hdus), diff.report()
266        assert any(diff.diff_hdu_count), diff.report()
267
268    def test_ignore_keyword_values(self):
269        ha = Header([('A', 1), ('B', 2), ('C', 3)])
270        hb = ha.copy()
271        hb['B'] = 4
272        hb['C'] = 5
273        diff = HeaderDiff(ha, hb, ignore_keywords=['*'])
274        assert diff.identical
275        diff = HeaderDiff(ha, hb, ignore_keywords=['B'])
276        assert not diff.identical
277        assert diff.diff_keyword_values == {'C': [(3, 5)]}
278
279        report = diff.report()
280        assert 'Keyword B        has different values' not in report
281        assert 'Keyword C        has different values' in report
282
283        # Test case-insensitivity
284        diff = HeaderDiff(ha, hb, ignore_keywords=['b'])
285        assert not diff.identical
286        assert diff.diff_keyword_values == {'C': [(3, 5)]}
287
288    def test_ignore_keyword_comments(self):
289        ha = Header([('A', 1, 'A'), ('B', 2, 'B'), ('C', 3, 'C')])
290        hb = ha.copy()
291        hb.comments['B'] = 'D'
292        hb.comments['C'] = 'E'
293        diff = HeaderDiff(ha, hb, ignore_comments=['*'])
294        assert diff.identical
295        diff = HeaderDiff(ha, hb, ignore_comments=['B'])
296        assert not diff.identical
297        assert diff.diff_keyword_comments == {'C': [('C', 'E')]}
298
299        report = diff.report()
300        assert 'Keyword B        has different comments' not in report
301        assert 'Keyword C        has different comments' in report
302
303        # Test case-insensitivity
304        diff = HeaderDiff(ha, hb, ignore_comments=['b'])
305        assert not diff.identical
306        assert diff.diff_keyword_comments == {'C': [('C', 'E')]}
307
308    def test_trivial_identical_images(self):
309        ia = np.arange(100).reshape(10, 10)
310        ib = np.arange(100).reshape(10, 10)
311        diff = ImageDataDiff(ia, ib)
312        assert diff.identical
313        assert diff.diff_total == 0
314
315    def test_identical_within_relative_tolerance(self):
316        ia = np.ones((10, 10)) - 0.00001
317        ib = np.ones((10, 10)) - 0.00002
318        diff = ImageDataDiff(ia, ib, rtol=1.0e-4)
319        assert diff.identical
320        assert diff.diff_total == 0
321
322    def test_identical_within_absolute_tolerance(self):
323        ia = np.zeros((10, 10)) - 0.00001
324        ib = np.zeros((10, 10)) - 0.00002
325        diff = ImageDataDiff(ia, ib, rtol=1.0e-4)
326        assert not diff.identical
327        assert diff.diff_total == 100
328        diff = ImageDataDiff(ia, ib, atol=1.0e-4)
329        assert diff.identical
330        assert diff.diff_total == 0
331
332    def test_identical_within_rtol_and_atol(self):
333        ia = np.zeros((10, 10)) - 0.00001
334        ib = np.zeros((10, 10)) - 0.00002
335        diff = ImageDataDiff(ia, ib, rtol=1.0e-5, atol=1.0e-5)
336        assert diff.identical
337        assert diff.diff_total == 0
338
339    def test_not_identical_within_rtol_and_atol(self):
340        ia = np.zeros((10, 10)) - 0.00001
341        ib = np.zeros((10, 10)) - 0.00002
342        diff = ImageDataDiff(ia, ib, rtol=1.0e-5, atol=1.0e-6)
343        assert not diff.identical
344        assert diff.diff_total == 100
345
346    def test_identical_comp_image_hdus(self):
347        """Regression test for https://aeon.stsci.edu/ssb/trac/pyfits/ticket/189
348
349        For this test we mostly just care that comparing to compressed images
350        does not crash, and returns the correct results.  Two compressed images
351        will be considered identical if the decompressed data is the same.
352        Obviously we test whether or not the same compression was used by
353        looking for (or ignoring) header differences.
354        """
355
356        data = np.arange(100.0).reshape(10, 10)
357        hdu = fits.CompImageHDU(data=data)
358        hdu.writeto(self.temp('test.fits'))
359
360        with fits.open(self.temp('test.fits')) as hdula, \
361                fits.open(self.temp('test.fits')) as hdulb:
362            diff = FITSDiff(hdula, hdulb)
363            assert diff.identical
364
365    def test_different_dimensions(self):
366        ia = np.arange(100).reshape(10, 10)
367        ib = np.arange(100) - 1
368
369        # Although ib could be reshaped into the same dimensions, for now the
370        # data is not compared anyways
371        diff = ImageDataDiff(ia, ib)
372        assert not diff.identical
373        assert diff.diff_dimensions == ((10, 10), (100,))
374        assert diff.diff_total == 0
375
376        report = diff.report()
377        assert 'Data dimensions differ' in report
378        assert 'a: 10 x 10' in report
379        assert 'b: 100' in report
380        assert 'No further data comparison performed.'
381
382    def test_different_pixels(self):
383        ia = np.arange(100).reshape(10, 10)
384        ib = np.arange(100).reshape(10, 10)
385        ib[0, 0] = 10
386        ib[5, 5] = 20
387        diff = ImageDataDiff(ia, ib)
388        assert not diff.identical
389        assert diff.diff_dimensions == ()
390        assert diff.diff_total == 2
391        assert diff.diff_ratio == 0.02
392        assert diff.diff_pixels == [((0, 0), (0, 10)), ((5, 5), (55, 20))]
393
394    def test_identical_tables(self):
395        c1 = Column('A', format='L', array=[True, False])
396        c2 = Column('B', format='X', array=[[0], [1]])
397        c3 = Column('C', format='4I', dim='(2, 2)',
398                    array=[[0, 1, 2, 3], [4, 5, 6, 7]])
399        c4 = Column('D', format='J', bscale=2.0, array=[0, 1])
400        c5 = Column('E', format='A3', array=['abc', 'def'])
401        c6 = Column('F', format='E', unit='m', array=[0.0, 1.0])
402        c7 = Column('G', format='D', bzero=-0.1, array=[0.0, 1.0])
403        c8 = Column('H', format='C', array=[0.0+1.0j, 2.0+3.0j])
404        c9 = Column('I', format='M', array=[4.0+5.0j, 6.0+7.0j])
405        c10 = Column('J', format='PI(2)', array=[[0, 1], [2, 3]])
406
407        columns = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]
408
409        ta = BinTableHDU.from_columns(columns)
410        tb = BinTableHDU.from_columns([c.copy() for c in columns])
411
412        diff = TableDataDiff(ta.data, tb.data)
413        assert diff.identical
414        assert len(diff.common_columns) == 10
415        assert diff.common_column_names == set('abcdefghij')
416        assert diff.diff_ratio == 0
417        assert diff.diff_total == 0
418
419    def test_diff_empty_tables(self):
420        """
421        Regression test for https://aeon.stsci.edu/ssb/trac/pyfits/ticket/178
422
423        Ensure that diffing tables containing empty data doesn't crash.
424        """
425
426        c1 = Column('D', format='J')
427        c2 = Column('E', format='J')
428        thdu = BinTableHDU.from_columns([c1, c2], nrows=0)
429
430        hdula = fits.HDUList([thdu])
431        hdulb = fits.HDUList([thdu])
432
433        diff = FITSDiff(hdula, hdulb)
434        assert diff.identical
435
436    def test_ignore_table_fields(self):
437        c1 = Column('A', format='L', array=[True, False])
438        c2 = Column('B', format='X', array=[[0], [1]])
439        c3 = Column('C', format='4I', dim='(2, 2)',
440                    array=[[0, 1, 2, 3], [4, 5, 6, 7]])
441
442        c4 = Column('B', format='X', array=[[1], [0]])
443        c5 = Column('C', format='4I', dim='(2, 2)',
444                    array=[[1, 2, 3, 4], [5, 6, 7, 8]])
445
446        ta = BinTableHDU.from_columns([c1, c2, c3])
447        tb = BinTableHDU.from_columns([c1, c4, c5])
448
449        diff = TableDataDiff(ta.data, tb.data, ignore_fields=['B', 'C'])
450        assert diff.identical
451
452        # The only common column should be c1
453        assert len(diff.common_columns) == 1
454        assert diff.common_column_names == {'a'}
455        assert diff.diff_ratio == 0
456        assert diff.diff_total == 0
457
458    def test_different_table_field_names(self):
459        ca = Column('A', format='L', array=[True, False])
460        cb = Column('B', format='L', array=[True, False])
461        cc = Column('C', format='L', array=[True, False])
462
463        ta = BinTableHDU.from_columns([ca, cb])
464        tb = BinTableHDU.from_columns([ca, cc])
465
466        diff = TableDataDiff(ta.data, tb.data)
467
468        assert not diff.identical
469        assert len(diff.common_columns) == 1
470        assert diff.common_column_names == {'a'}
471        assert diff.diff_column_names == (['B'], ['C'])
472        assert diff.diff_ratio == 0
473        assert diff.diff_total == 0
474
475        report = diff.report()
476        assert 'Extra column B of format L in a' in report
477        assert 'Extra column C of format L in b' in report
478
479    def test_different_table_field_counts(self):
480        """
481        Test tables with some common columns, but different number of columns
482        overall.
483        """
484
485        ca = Column('A', format='L', array=[True, False])
486        cb = Column('B', format='L', array=[True, False])
487        cc = Column('C', format='L', array=[True, False])
488
489        ta = BinTableHDU.from_columns([cb])
490        tb = BinTableHDU.from_columns([ca, cb, cc])
491
492        diff = TableDataDiff(ta.data, tb.data)
493
494        assert not diff.identical
495        assert diff.diff_column_count == (1, 3)
496        assert len(diff.common_columns) == 1
497        assert diff.common_column_names == {'b'}
498        assert diff.diff_column_names == ([], ['A', 'C'])
499        assert diff.diff_ratio == 0
500        assert diff.diff_total == 0
501
502        report = diff.report()
503        assert ' Tables have different number of columns:' in report
504        assert '  a: 1\n  b: 3' in report
505
506    def test_different_table_rows(self):
507        """
508        Test tables that are otherwise identical but one has more rows than the
509        other.
510        """
511
512        ca1 = Column('A', format='L', array=[True, False])
513        cb1 = Column('B', format='L', array=[True, False])
514        ca2 = Column('A', format='L', array=[True, False, True])
515        cb2 = Column('B', format='L', array=[True, False, True])
516
517        ta = BinTableHDU.from_columns([ca1, cb1])
518        tb = BinTableHDU.from_columns([ca2, cb2])
519
520        diff = TableDataDiff(ta.data, tb.data)
521
522        assert not diff.identical
523        assert diff.diff_column_count == ()
524        assert len(diff.common_columns) == 2
525        assert diff.diff_rows == (2, 3)
526        assert diff.diff_values == []
527
528        report = diff.report()
529
530        assert 'Table rows differ' in report
531        assert 'a: 2' in report
532        assert 'b: 3' in report
533        assert 'No further data comparison performed.'
534
535    def test_different_table_data(self):
536        """
537        Test diffing table data on columns of several different data formats
538        and dimensions.
539        """
540
541        ca1 = Column('A', format='L', array=[True, False])
542        ca2 = Column('B', format='X', array=[[0], [1]])
543        ca3 = Column('C', format='4I', dim='(2, 2)',
544                     array=[[0, 1, 2, 3], [4, 5, 6, 7]])
545        ca4 = Column('D', format='J', bscale=2.0, array=[0.0, 2.0])
546        ca5 = Column('E', format='A3', array=['abc', 'def'])
547        ca6 = Column('F', format='E', unit='m', array=[0.0, 1.0])
548        ca7 = Column('G', format='D', bzero=-0.1, array=[0.0, 1.0])
549        ca8 = Column('H', format='C', array=[0.0+1.0j, 2.0+3.0j])
550        ca9 = Column('I', format='M', array=[4.0+5.0j, 6.0+7.0j])
551        ca10 = Column('J', format='PI(2)', array=[[0, 1], [2, 3]])
552
553        cb1 = Column('A', format='L', array=[False, False])
554        cb2 = Column('B', format='X', array=[[0], [0]])
555        cb3 = Column('C', format='4I', dim='(2, 2)',
556                     array=[[0, 1, 2, 3], [5, 6, 7, 8]])
557        cb4 = Column('D', format='J', bscale=2.0, array=[2.0, 2.0])
558        cb5 = Column('E', format='A3', array=['abc', 'ghi'])
559        cb6 = Column('F', format='E', unit='m', array=[1.0, 2.0])
560        cb7 = Column('G', format='D', bzero=-0.1, array=[2.0, 3.0])
561        cb8 = Column('H', format='C', array=[1.0+1.0j, 2.0+3.0j])
562        cb9 = Column('I', format='M', array=[5.0+5.0j, 6.0+7.0j])
563        cb10 = Column('J', format='PI(2)', array=[[1, 2], [3, 4]])
564
565        ta = BinTableHDU.from_columns([ca1, ca2, ca3, ca4, ca5, ca6, ca7,
566                                       ca8, ca9, ca10])
567        tb = BinTableHDU.from_columns([cb1, cb2, cb3, cb4, cb5, cb6, cb7,
568                                       cb8, cb9, cb10])
569
570        diff = TableDataDiff(ta.data, tb.data, numdiffs=20)
571        assert not diff.identical
572        # The column definitions are the same, but not the column values
573        assert diff.diff_columns == ()
574        assert diff.diff_values[0] == (('A', 0), (True, False))
575        assert diff.diff_values[1] == (('B', 1), ([1], [0]))
576        assert diff.diff_values[2][0] == ('C', 1)
577        assert (diff.diff_values[2][1][0] == [[4, 5], [6, 7]]).all()
578        assert (diff.diff_values[2][1][1] == [[5, 6], [7, 8]]).all()
579        assert diff.diff_values[3] == (('D', 0), (0, 2.0))
580        assert diff.diff_values[4] == (('E', 1), ('def', 'ghi'))
581        assert diff.diff_values[5] == (('F', 0), (0.0, 1.0))
582        assert diff.diff_values[6] == (('F', 1), (1.0, 2.0))
583        assert diff.diff_values[7] == (('G', 0), (0.0, 2.0))
584        assert diff.diff_values[8] == (('G', 1), (1.0, 3.0))
585        assert diff.diff_values[9] == (('H', 0), (0.0+1.0j, 1.0+1.0j))
586        assert diff.diff_values[10] == (('I', 0), (4.0+5.0j, 5.0+5.0j))
587        assert diff.diff_values[11][0] == ('J', 0)
588        assert (diff.diff_values[11][1][0] == [0, 1]).all()
589        assert (diff.diff_values[11][1][1] == [1, 2]).all()
590        assert diff.diff_values[12][0] == ('J', 1)
591        assert (diff.diff_values[12][1][0] == [2, 3]).all()
592        assert (diff.diff_values[12][1][1] == [3, 4]).all()
593
594        assert diff.diff_total == 13
595        assert diff.diff_ratio == 0.65
596
597        report = diff.report()
598        assert ('Column A data differs in row 0:\n'
599                '    a> True\n'
600                '    b> False') in report
601        assert ('...and at 1 more indices.\n'
602                ' Column D data differs in row 0:') in report
603        assert ('13 different table data element(s) found (65.00% different)'
604                in report)
605        assert report.count('more indices') == 1
606
607    def test_identical_files_basic(self):
608        """Test identicality of two simple, extensionless files."""
609
610        a = np.arange(100).reshape(10, 10)
611        hdu = PrimaryHDU(data=a)
612        hdu.writeto(self.temp('testa.fits'))
613        hdu.writeto(self.temp('testb.fits'))
614        diff = FITSDiff(self.temp('testa.fits'), self.temp('testb.fits'))
615        assert diff.identical
616
617        report = diff.report()
618        # Primary HDUs should contain no differences
619        assert 'Primary HDU' not in report
620        assert 'Extension HDU' not in report
621        assert 'No differences found.' in report
622
623        a = np.arange(10)
624        ehdu = ImageHDU(data=a)
625        diff = HDUDiff(ehdu, ehdu)
626        assert diff.identical
627        report = diff.report()
628        assert 'No differences found.' in report
629
630    def test_partially_identical_files1(self):
631        """
632        Test files that have some identical HDUs but a different extension
633        count.
634        """
635
636        a = np.arange(100).reshape(10, 10)
637        phdu = PrimaryHDU(data=a)
638        ehdu = ImageHDU(data=a)
639        hdula = HDUList([phdu, ehdu])
640        hdulb = HDUList([phdu, ehdu, ehdu])
641        diff = FITSDiff(hdula, hdulb)
642        assert not diff.identical
643        assert diff.diff_hdu_count == (2, 3)
644
645        # diff_hdus should be empty, since the third extension in hdulb
646        # has nothing to compare against
647        assert diff.diff_hdus == []
648
649        report = diff.report()
650        assert 'Files contain different numbers of HDUs' in report
651        assert 'a: 2\n b: 3' in report
652        assert 'No differences found between common HDUs' in report
653
654    def test_partially_identical_files2(self):
655        """
656        Test files that have some identical HDUs but one different HDU.
657        """
658
659        a = np.arange(100).reshape(10, 10)
660        phdu = PrimaryHDU(data=a)
661        ehdu = ImageHDU(data=a)
662        ehdu2 = ImageHDU(data=(a + 1))
663        hdula = HDUList([phdu, ehdu, ehdu])
664        hdulb = HDUList([phdu, ehdu2, ehdu])
665        diff = FITSDiff(hdula, hdulb)
666
667        assert not diff.identical
668        assert diff.diff_hdu_count == ()
669        assert len(diff.diff_hdus) == 1
670        assert diff.diff_hdus[0][0] == 1
671
672        hdudiff = diff.diff_hdus[0][1]
673        assert not hdudiff.identical
674        assert hdudiff.diff_extnames == ()
675        assert hdudiff.diff_extvers == ()
676        assert hdudiff.diff_extension_types == ()
677        assert hdudiff.diff_headers.identical
678        assert hdudiff.diff_data is not None
679
680        datadiff = hdudiff.diff_data
681        assert isinstance(datadiff, ImageDataDiff)
682        assert not datadiff.identical
683        assert datadiff.diff_dimensions == ()
684        assert (datadiff.diff_pixels ==
685                [((0, y), (y, y + 1)) for y in range(10)])
686        assert datadiff.diff_ratio == 1.0
687        assert datadiff.diff_total == 100
688
689        report = diff.report()
690        # Primary HDU and 2nd extension HDU should have no differences
691        assert 'Primary HDU' not in report
692        assert 'Extension HDU 2' not in report
693        assert 'Extension HDU 1' in report
694
695        assert 'Headers contain differences' not in report
696        assert 'Data contains differences' in report
697        for y in range(10):
698            assert f'Data differs at [{y + 1}, 1]' in report
699        assert '100 different pixels found (100.00% different).' in report
700
701    def test_partially_identical_files3(self):
702        """
703        Test files that have some identical HDUs but a different extension
704        name.
705        """
706
707        phdu = PrimaryHDU()
708        ehdu = ImageHDU(name='FOO')
709        hdula = HDUList([phdu, ehdu])
710        ehdu = BinTableHDU(name='BAR')
711        ehdu.header['EXTVER'] = 2
712        ehdu.header['EXTLEVEL'] = 3
713        hdulb = HDUList([phdu, ehdu])
714        diff = FITSDiff(hdula, hdulb)
715        assert not diff.identical
716
717        assert diff.diff_hdus[0][0] == 1
718
719        hdu_diff = diff.diff_hdus[0][1]
720        assert hdu_diff.diff_extension_types == ('IMAGE', 'BINTABLE')
721        assert hdu_diff.diff_extnames == ('FOO', 'BAR')
722        assert hdu_diff.diff_extvers == (1, 2)
723        assert hdu_diff.diff_extlevels == (1, 3)
724
725        report = diff.report()
726        assert 'Extension types differ' in report
727        assert 'a: IMAGE\n    b: BINTABLE' in report
728        assert 'Extension names differ' in report
729        assert 'a: FOO\n    b: BAR' in report
730        assert 'Extension versions differ' in report
731        assert 'a: 1\n    b: 2' in report
732        assert 'Extension levels differ' in report
733        assert 'a: 1\n    b: 2' in report
734
735    def test_diff_nans(self):
736        """
737        Regression test for https://aeon.stsci.edu/ssb/trac/pyfits/ticket/204
738        """
739
740        # First test some arrays that should be equivalent....
741        arr = np.empty((10, 10), dtype=np.float64)
742        arr[:5] = 1.0
743        arr[5:] = np.nan
744        arr2 = arr.copy()
745
746        table = np.rec.array([(1.0, 2.0), (3.0, np.nan), (np.nan, np.nan)],
747                             names=['cola', 'colb']).view(fits.FITS_rec)
748        table2 = table.copy()
749
750        assert ImageDataDiff(arr, arr2).identical
751        assert TableDataDiff(table, table2).identical
752
753        # Now let's introduce some differences, where there are nans and where
754        # there are not nans
755        arr2[0][0] = 2.0
756        arr2[5][0] = 2.0
757        table2[0][0] = 2.0
758        table2[1][1] = 2.0
759
760        diff = ImageDataDiff(arr, arr2)
761        assert not diff.identical
762        assert diff.diff_pixels[0] == ((0, 0), (1.0, 2.0))
763        assert diff.diff_pixels[1][0] == (5, 0)
764        assert np.isnan(diff.diff_pixels[1][1][0])
765        assert diff.diff_pixels[1][1][1] == 2.0
766
767        diff = TableDataDiff(table, table2)
768        assert not diff.identical
769        assert diff.diff_values[0] == (('cola', 0), (1.0, 2.0))
770        assert diff.diff_values[1][0] == ('colb', 1)
771        assert np.isnan(diff.diff_values[1][1][0])
772        assert diff.diff_values[1][1][1] == 2.0
773
774    def test_file_output_from_path_string(self):
775        outpath = self.temp('diff_output.txt')
776        ha = Header([('A', 1), ('B', 2), ('C', 3)])
777        hb = ha.copy()
778        hb['C'] = 4
779        diffobj = HeaderDiff(ha, hb)
780        diffobj.report(fileobj=outpath)
781        report_as_string = diffobj.report()
782        with open(outpath) as fout:
783            assert fout.read() == report_as_string
784
785    def test_file_output_overwrite_safety(self):
786        outpath = self.temp('diff_output.txt')
787        ha = Header([('A', 1), ('B', 2), ('C', 3)])
788        hb = ha.copy()
789        hb['C'] = 4
790        diffobj = HeaderDiff(ha, hb)
791        diffobj.report(fileobj=outpath)
792
793        with pytest.raises(OSError, match=_NOT_OVERWRITING_MSG_MATCH):
794            diffobj.report(fileobj=outpath)
795
796    def test_file_output_overwrite_success(self):
797        outpath = self.temp('diff_output.txt')
798        ha = Header([('A', 1), ('B', 2), ('C', 3)])
799        hb = ha.copy()
800        hb['C'] = 4
801        diffobj = HeaderDiff(ha, hb)
802        diffobj.report(fileobj=outpath)
803        report_as_string = diffobj.report()
804        diffobj.report(fileobj=outpath, overwrite=True)
805        with open(outpath) as fout:
806            assert fout.read() == report_as_string, (
807                "overwritten output file is not identical to report string")
808
809    def test_file_output_overwrite_vs_clobber(self):
810        """Verify uses of clobber and overwrite."""
811
812        outpath = self.temp('diff_output.txt')
813        ha = Header([('A', 1), ('B', 2), ('C', 3)])
814        hb = ha.copy()
815        hb['C'] = 4
816        diffobj = HeaderDiff(ha, hb)
817        diffobj.report(fileobj=outpath)
818        with pytest.warns(AstropyDeprecationWarning, match=r'"clobber" was '
819                          r'deprecated in version 2\.0 and will be removed in '
820                          r'version 5\.1\. Use argument "overwrite" instead\.'):
821            diffobj.report(fileobj=outpath, clobber=True)
822
823    def test_rawdatadiff_nodiff(self):
824        a = np.arange(100, dtype='uint8').reshape(10, 10)
825        b = a.copy()
826        hdu_a = DummyNonstandardExtHDU(data=a)
827        hdu_b = DummyNonstandardExtHDU(data=b)
828        diff = HDUDiff(hdu_a, hdu_b)
829        assert diff.identical
830        report = diff.report()
831        assert 'No differences found.' in report
832
833    def test_rawdatadiff_dimsdiff(self):
834        a = np.arange(100, dtype='uint8') + 10
835        b = a[:80].copy()
836        hdu_a = DummyNonstandardExtHDU(data=a)
837        hdu_b = DummyNonstandardExtHDU(data=b)
838        diff = HDUDiff(hdu_a, hdu_b)
839        assert not diff.identical
840        report = diff.report()
841        assert 'Data sizes differ:' in report
842        assert 'a: 100 bytes' in report
843        assert 'b: 80 bytes' in report
844        assert 'No further data comparison performed.' in report
845
846    def test_rawdatadiff_bytesdiff(self):
847        a = np.arange(100, dtype='uint8') + 10
848        b = a.copy()
849        changes = [(30, 200), (89, 170)]
850        for i, v in changes:
851            b[i] = v
852
853        hdu_a = DummyNonstandardExtHDU(data=a)
854        hdu_b = DummyNonstandardExtHDU(data=b)
855        diff = HDUDiff(hdu_a, hdu_b)
856
857        assert not diff.identical
858
859        diff_bytes = diff.diff_data.diff_bytes
860        assert len(changes) == len(diff_bytes)
861        for j, (i, v) in enumerate(changes):
862            assert diff_bytes[j] == (i, (i+10, v))
863
864        report = diff.report()
865        assert 'Data contains differences:' in report
866        for i, _ in changes:
867            assert f'Data differs at byte {i}:' in report
868        assert '2 different bytes found (2.00% different).' in report
869
870
871def test_fitsdiff_hdu_name(tmpdir):
872    """Make sure diff report reports HDU name and ver if same in files"""
873    path1 = str(tmpdir.join("test1.fits"))
874    path2 = str(tmpdir.join("test2.fits"))
875
876    hdulist = HDUList([PrimaryHDU(), ImageHDU(data=np.zeros(5), name="SCI")])
877    hdulist.writeto(path1)
878    hdulist[1].data[0] = 1
879    hdulist.writeto(path2)
880
881    diff = FITSDiff(path1, path2)
882    assert "Extension HDU 1 (SCI, 1):" in diff.report()
883
884
885def test_fitsdiff_no_hdu_name(tmpdir):
886    """Make sure diff report doesn't report HDU name if not in files"""
887    path1 = str(tmpdir.join("test1.fits"))
888    path2 = str(tmpdir.join("test2.fits"))
889
890    hdulist = HDUList([PrimaryHDU(), ImageHDU(data=np.zeros(5))])
891    hdulist.writeto(path1)
892    hdulist[1].data[0] = 1
893    hdulist.writeto(path2)
894
895    diff = FITSDiff(path1, path2)
896    assert "Extension HDU 1:" in diff.report()
897
898
899def test_fitsdiff_with_names(tmpdir):
900    """Make sure diff report doesn't report HDU name if not same in files"""
901    path1 = str(tmpdir.join("test1.fits"))
902    path2 = str(tmpdir.join("test2.fits"))
903
904    hdulist = HDUList([PrimaryHDU(), ImageHDU(data=np.zeros(5), name="SCI", ver=1)])
905    hdulist.writeto(path1)
906    hdulist[1].name = "ERR"
907    hdulist.writeto(path2)
908
909    diff = FITSDiff(path1, path2)
910    assert "Extension HDU 1:" in diff.report()
911