1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2"""
3Facilities for diffing two FITS files.  Includes objects for diffing entire
4FITS files, individual HDUs, FITS headers, or just FITS data.
5
6Used to implement the fitsdiff program.
7"""
8import fnmatch
9import glob
10import io
11import operator
12import os
13import os.path
14import textwrap
15
16from collections import defaultdict
17from inspect import signature
18from itertools import islice
19
20import numpy as np
21
22from astropy import __version__
23
24from .card import Card, BLANK_CARD
25from .header import Header
26from astropy.utils.decorators import deprecated_renamed_argument
27# HDUList is used in one of the doctests
28from .hdu.hdulist import fitsopen, HDUList  # pylint: disable=W0611
29from .hdu.table import _TableLikeHDU
30from astropy.utils.diff import (report_diff_values, fixed_width_indent,
31                                where_not_allclose, diff_values)
32from astropy.utils.misc import NOT_OVERWRITING_MSG
33
34__all__ = ['FITSDiff', 'HDUDiff', 'HeaderDiff', 'ImageDataDiff', 'RawDataDiff',
35           'TableDataDiff']
36
37# Column attributes of interest for comparison
38_COL_ATTRS = [('unit', 'units'), ('null', 'null values'),
39              ('bscale', 'bscales'), ('bzero', 'bzeros'),
40              ('disp', 'display formats'), ('dim', 'dimensions')]
41
42
43class _BaseDiff:
44    """
45    Base class for all FITS diff objects.
46
47    When instantiating a FITS diff object, the first two arguments are always
48    the two objects to diff (two FITS files, two FITS headers, etc.).
49    Instantiating a ``_BaseDiff`` also causes the diff itself to be executed.
50    The returned ``_BaseDiff`` instance has a number of attribute that describe
51    the results of the diff operation.
52
53    The most basic attribute, present on all ``_BaseDiff`` instances, is
54    ``.identical`` which is `True` if the two objects being compared are
55    identical according to the diff method for objects of that type.
56    """
57
58    def __init__(self, a, b):
59        """
60        The ``_BaseDiff`` class does not implement a ``_diff`` method and
61        should not be instantiated directly. Instead instantiate the
62        appropriate subclass of ``_BaseDiff`` for the objects being compared
63        (for example, use `HeaderDiff` to compare two `Header` objects.
64        """
65
66        self.a = a
67        self.b = b
68
69        # For internal use in report output
70        self._fileobj = None
71        self._indent = 0
72
73        self._diff()
74
75    def __bool__(self):
76        """
77        A ``_BaseDiff`` object acts as `True` in a boolean context if the two
78        objects compared are identical.  Otherwise it acts as `False`.
79        """
80
81        return not self.identical
82
83    @classmethod
84    def fromdiff(cls, other, a, b):
85        """
86        Returns a new Diff object of a specific subclass from an existing diff
87        object, passing on the values for any arguments they share in common
88        (such as ignore_keywords).
89
90        For example::
91
92            >>> from astropy.io import fits
93            >>> hdul1, hdul2 = fits.HDUList(), fits.HDUList()
94            >>> headera, headerb = fits.Header(), fits.Header()
95            >>> fd = fits.FITSDiff(hdul1, hdul2, ignore_keywords=['*'])
96            >>> hd = fits.HeaderDiff.fromdiff(fd, headera, headerb)
97            >>> list(hd.ignore_keywords)
98            ['*']
99        """
100
101        sig = signature(cls.__init__)
102        # The first 3 arguments of any Diff initializer are self, a, and b.
103        kwargs = {}
104        for arg in list(sig.parameters.keys())[3:]:
105            if hasattr(other, arg):
106                kwargs[arg] = getattr(other, arg)
107
108        return cls(a, b, **kwargs)
109
110    @property
111    def identical(self):
112        """
113        `True` if all the ``.diff_*`` attributes on this diff instance are
114        empty, implying that no differences were found.
115
116        Any subclass of ``_BaseDiff`` must have at least one ``.diff_*``
117        attribute, which contains a non-empty value if and only if some
118        difference was found between the two objects being compared.
119        """
120
121        return not any(getattr(self, attr) for attr in self.__dict__
122                       if attr.startswith('diff_'))
123
124    @deprecated_renamed_argument('clobber', 'overwrite', '2.0',
125                                 message='"clobber" was deprecated in version '
126                                         '2.0 and will be removed in version '
127                                         '5.1. Use argument "overwrite" '
128                                         'instead.')
129    def report(self, fileobj=None, indent=0, overwrite=False):
130        """
131        Generates a text report on the differences (if any) between two
132        objects, and either returns it as a string or writes it to a file-like
133        object.
134
135        Parameters
136        ----------
137        fileobj : file-like, string, or None, optional
138            If `None`, this method returns the report as a string. Otherwise it
139            returns `None` and writes the report to the given file-like object
140            (which must have a ``.write()`` method at a minimum), or to a new
141            file at the path specified.
142
143        indent : int
144            The number of 4 space tabs to indent the report.
145
146        overwrite : bool, optional
147            If ``True``, overwrite the output file if it exists. Raises an
148            ``OSError`` if ``False`` and the output file exists. Default is
149            ``False``.
150
151            .. versionchanged:: 1.3
152               ``overwrite`` replaces the deprecated ``clobber`` argument.
153
154        Returns
155        -------
156        report : str or None
157        """
158
159        return_string = False
160        filepath = None
161
162        if isinstance(fileobj, str):
163            if os.path.exists(fileobj) and not overwrite:
164                raise OSError(NOT_OVERWRITING_MSG.format(fileobj))
165            else:
166                filepath = fileobj
167                fileobj = open(filepath, 'w')
168        elif fileobj is None:
169            fileobj = io.StringIO()
170            return_string = True
171
172        self._fileobj = fileobj
173        self._indent = indent  # This is used internally by _writeln
174
175        try:
176            self._report()
177        finally:
178            if filepath:
179                fileobj.close()
180
181        if return_string:
182            return fileobj.getvalue()
183
184    def _writeln(self, text):
185        self._fileobj.write(fixed_width_indent(text, self._indent) + '\n')
186
187    def _diff(self):
188        raise NotImplementedError
189
190    def _report(self):
191        raise NotImplementedError
192
193
194class FITSDiff(_BaseDiff):
195    """Diff two FITS files by filename, or two `HDUList` objects.
196
197    `FITSDiff` objects have the following diff attributes:
198
199    - ``diff_hdu_count``: If the FITS files being compared have different
200      numbers of HDUs, this contains a 2-tuple of the number of HDUs in each
201      file.
202
203    - ``diff_hdus``: If any HDUs with the same index are different, this
204      contains a list of 2-tuples of the HDU index and the `HDUDiff` object
205      representing the differences between the two HDUs.
206    """
207
208    def __init__(self, a, b, ignore_hdus=[], ignore_keywords=[],
209                 ignore_comments=[], ignore_fields=[],
210                 numdiffs=10, rtol=0.0, atol=0.0,
211                 ignore_blanks=True, ignore_blank_cards=True):
212        """
213        Parameters
214        ----------
215        a : str or `HDUList`
216            The filename of a FITS file on disk, or an `HDUList` object.
217
218        b : str or `HDUList`
219            The filename of a FITS file on disk, or an `HDUList` object to
220            compare to the first file.
221
222        ignore_hdus : sequence, optional
223            HDU names to ignore when comparing two FITS files or HDU lists; the
224            presence of these HDUs and their contents are ignored.  Wildcard
225            strings may also be included in the list.
226
227        ignore_keywords : sequence, optional
228            Header keywords to ignore when comparing two headers; the presence
229            of these keywords and their values are ignored.  Wildcard strings
230            may also be included in the list.
231
232        ignore_comments : sequence, optional
233            A list of header keywords whose comments should be ignored in the
234            comparison.  May contain wildcard strings as with ignore_keywords.
235
236        ignore_fields : sequence, optional
237            The (case-insensitive) names of any table columns to ignore if any
238            table data is to be compared.
239
240        numdiffs : int, optional
241            The number of pixel/table values to output when reporting HDU data
242            differences.  Though the count of differences is the same either
243            way, this allows controlling the number of different values that
244            are kept in memory or output.  If a negative value is given, then
245            numdiffs is treated as unlimited (default: 10).
246
247        rtol : float, optional
248            The relative difference to allow when comparing two float values
249            either in header values, image arrays, or table columns
250            (default: 0.0). Values which satisfy the expression
251
252            .. math::
253
254                \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right|
255
256            are considered to be different.
257            The underlying function used for comparison is `numpy.allclose`.
258
259            .. versionadded:: 2.0
260
261        atol : float, optional
262            The allowed absolute difference. See also ``rtol`` parameter.
263
264            .. versionadded:: 2.0
265
266        ignore_blanks : bool, optional
267            Ignore extra whitespace at the end of string values either in
268            headers or data. Extra leading whitespace is not ignored
269            (default: True).
270
271        ignore_blank_cards : bool, optional
272            Ignore all cards that are blank, i.e. they only contain
273            whitespace (default: True).
274        """
275
276        if isinstance(a, (str, os.PathLike)):
277            try:
278                a = fitsopen(a)
279            except Exception as exc:
280                raise OSError("error opening file a ({}): {}: {}".format(
281                        a, exc.__class__.__name__, exc.args[0]))
282            close_a = True
283        else:
284            close_a = False
285
286        if isinstance(b, (str, os.PathLike)):
287            try:
288                b = fitsopen(b)
289            except Exception as exc:
290                raise OSError("error opening file b ({}): {}: {}".format(
291                        b, exc.__class__.__name__, exc.args[0]))
292            close_b = True
293        else:
294            close_b = False
295
296        # Normalize keywords/fields to ignore to upper case
297        self.ignore_hdus = set(k.upper() for k in ignore_hdus)
298        self.ignore_keywords = set(k.upper() for k in ignore_keywords)
299        self.ignore_comments = set(k.upper() for k in ignore_comments)
300        self.ignore_fields = set(k.upper() for k in ignore_fields)
301
302        self.numdiffs = numdiffs
303        self.rtol = rtol
304        self.atol = atol
305
306        self.ignore_blanks = ignore_blanks
307        self.ignore_blank_cards = ignore_blank_cards
308
309        # Some hdu names may be pattern wildcards.  Find them.
310        self.ignore_hdu_patterns = set()
311        for name in list(self.ignore_hdus):
312            if name != '*' and glob.has_magic(name):
313                self.ignore_hdus.remove(name)
314                self.ignore_hdu_patterns.add(name)
315
316        self.diff_hdu_count = ()
317        self.diff_hdus = []
318
319        try:
320            super().__init__(a, b)
321        finally:
322            if close_a:
323                a.close()
324            if close_b:
325                b.close()
326
327    def _diff(self):
328        if len(self.a) != len(self.b):
329            self.diff_hdu_count = (len(self.a), len(self.b))
330
331        # Record filenames for use later in _report
332        self.filenamea = self.a.filename()
333        if not self.filenamea:
334            self.filenamea = f'<{self.a.__class__.__name__} object at {id(self.a):#x}>'
335
336        self.filenameb = self.b.filename()
337        if not self.filenameb:
338            self.filenameb = f'<{self.b.__class__.__name__} object at {id(self.b):#x}>'
339
340        if self.ignore_hdus:
341            self.a = HDUList([h for h in self.a if h.name not in self.ignore_hdus])
342            self.b = HDUList([h for h in self.b if h.name not in self.ignore_hdus])
343        if self.ignore_hdu_patterns:
344            a_names = [hdu.name for hdu in self.a]
345            b_names = [hdu.name for hdu in self.b]
346            for pattern in self.ignore_hdu_patterns:
347                self.a = HDUList([h for h in self.a if h.name not in fnmatch.filter(
348                    a_names, pattern)])
349                self.b = HDUList([h for h in self.b if h.name not in fnmatch.filter(
350                    b_names, pattern)])
351
352        # For now, just compare the extensions one by one in order.
353        # Might allow some more sophisticated types of diffing later.
354
355        # TODO: Somehow or another simplify the passing around of diff
356        # options--this will become important as the number of options grows
357        for idx in range(min(len(self.a), len(self.b))):
358            hdu_diff = HDUDiff.fromdiff(self, self.a[idx], self.b[idx])
359
360            if not hdu_diff.identical:
361                if self.a[idx].name == self.b[idx].name and self.a[idx].ver == self.b[idx].ver:
362                    self.diff_hdus.append((idx, hdu_diff, self.a[idx].name, self.a[idx].ver))
363                else:
364                    self.diff_hdus.append((idx, hdu_diff, "", self.a[idx].ver))
365
366    def _report(self):
367        wrapper = textwrap.TextWrapper(initial_indent='  ',
368                                       subsequent_indent='  ')
369
370        self._fileobj.write('\n')
371        self._writeln(f' fitsdiff: {__version__}')
372        self._writeln(f' a: {self.filenamea}\n b: {self.filenameb}')
373
374        if self.ignore_hdus:
375            ignore_hdus = ' '.join(sorted(self.ignore_hdus))
376            self._writeln(f' HDU(s) not to be compared:\n{wrapper.fill(ignore_hdus)}')
377
378        if self.ignore_hdu_patterns:
379            ignore_hdu_patterns = ' '.join(sorted(self.ignore_hdu_patterns))
380            self._writeln(' HDU(s) not to be compared:\n{}'
381                          .format(wrapper.fill(ignore_hdu_patterns)))
382
383        if self.ignore_keywords:
384            ignore_keywords = ' '.join(sorted(self.ignore_keywords))
385            self._writeln(' Keyword(s) not to be compared:\n{}'
386                          .format(wrapper.fill(ignore_keywords)))
387
388        if self.ignore_comments:
389            ignore_comments = ' '.join(sorted(self.ignore_comments))
390            self._writeln(' Keyword(s) whose comments are not to be compared'
391                          ':\n{}'.format(wrapper.fill(ignore_comments)))
392
393        if self.ignore_fields:
394            ignore_fields = ' '.join(sorted(self.ignore_fields))
395            self._writeln(' Table column(s) not to be compared:\n{}'
396                          .format(wrapper.fill(ignore_fields)))
397
398        self._writeln(' Maximum number of different data values to be '
399                      'reported: {}'.format(self.numdiffs))
400        self._writeln(' Relative tolerance: {}, Absolute tolerance: {}'
401                      .format(self.rtol, self.atol))
402
403        if self.diff_hdu_count:
404            self._fileobj.write('\n')
405            self._writeln('Files contain different numbers of HDUs:')
406            self._writeln(f' a: {self.diff_hdu_count[0]}')
407            self._writeln(f' b: {self.diff_hdu_count[1]}')
408
409            if not self.diff_hdus:
410                self._writeln('No differences found between common HDUs.')
411                return
412        elif not self.diff_hdus:
413            self._fileobj.write('\n')
414            self._writeln('No differences found.')
415            return
416
417        for idx, hdu_diff, extname, extver in self.diff_hdus:
418            # print out the extension heading
419            if idx == 0:
420                self._fileobj.write('\n')
421                self._writeln('Primary HDU:')
422            else:
423                self._fileobj.write('\n')
424                if extname:
425                    self._writeln(f'Extension HDU {idx} ({extname}, {extver}):')
426                else:
427                    self._writeln(f'Extension HDU {idx}:')
428            hdu_diff.report(self._fileobj, indent=self._indent + 1)
429
430
431class HDUDiff(_BaseDiff):
432    """
433    Diff two HDU objects, including their headers and their data (but only if
434    both HDUs contain the same type of data (image, table, or unknown).
435
436    `HDUDiff` objects have the following diff attributes:
437
438    - ``diff_extnames``: If the two HDUs have different EXTNAME values, this
439      contains a 2-tuple of the different extension names.
440
441    - ``diff_extvers``: If the two HDUS have different EXTVER values, this
442      contains a 2-tuple of the different extension versions.
443
444    - ``diff_extlevels``: If the two HDUs have different EXTLEVEL values, this
445      contains a 2-tuple of the different extension levels.
446
447    - ``diff_extension_types``: If the two HDUs have different XTENSION values,
448      this contains a 2-tuple of the different extension types.
449
450    - ``diff_headers``: Contains a `HeaderDiff` object for the headers of the
451      two HDUs. This will always contain an object--it may be determined
452      whether the headers are different through ``diff_headers.identical``.
453
454    - ``diff_data``: Contains either a `ImageDataDiff`, `TableDataDiff`, or
455      `RawDataDiff` as appropriate for the data in the HDUs, and only if the
456      two HDUs have non-empty data of the same type (`RawDataDiff` is used for
457      HDUs containing non-empty data of an indeterminate type).
458    """
459
460    def __init__(self, a, b, ignore_keywords=[], ignore_comments=[],
461                 ignore_fields=[], numdiffs=10, rtol=0.0, atol=0.0,
462                 ignore_blanks=True, ignore_blank_cards=True):
463        """
464        Parameters
465        ----------
466        a : BaseHDU
467            An HDU object.
468
469        b : BaseHDU
470            An HDU object to compare to the first HDU object.
471
472        ignore_keywords : sequence, optional
473            Header keywords to ignore when comparing two headers; the presence
474            of these keywords and their values are ignored.  Wildcard strings
475            may also be included in the list.
476
477        ignore_comments : sequence, optional
478            A list of header keywords whose comments should be ignored in the
479            comparison.  May contain wildcard strings as with ignore_keywords.
480
481        ignore_fields : sequence, optional
482            The (case-insensitive) names of any table columns to ignore if any
483            table data is to be compared.
484
485        numdiffs : int, optional
486            The number of pixel/table values to output when reporting HDU data
487            differences.  Though the count of differences is the same either
488            way, this allows controlling the number of different values that
489            are kept in memory or output.  If a negative value is given, then
490            numdiffs is treated as unlimited (default: 10).
491
492        rtol : float, optional
493            The relative difference to allow when comparing two float values
494            either in header values, image arrays, or table columns
495            (default: 0.0). Values which satisfy the expression
496
497            .. math::
498
499                \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right|
500
501            are considered to be different.
502            The underlying function used for comparison is `numpy.allclose`.
503
504            .. versionadded:: 2.0
505
506        atol : float, optional
507            The allowed absolute difference. See also ``rtol`` parameter.
508
509            .. versionadded:: 2.0
510
511        ignore_blanks : bool, optional
512            Ignore extra whitespace at the end of string values either in
513            headers or data. Extra leading whitespace is not ignored
514            (default: True).
515
516        ignore_blank_cards : bool, optional
517            Ignore all cards that are blank, i.e. they only contain
518            whitespace (default: True).
519        """
520
521        self.ignore_keywords = {k.upper() for k in ignore_keywords}
522        self.ignore_comments = {k.upper() for k in ignore_comments}
523        self.ignore_fields = {k.upper() for k in ignore_fields}
524
525        self.rtol = rtol
526        self.atol = atol
527
528        self.numdiffs = numdiffs
529        self.ignore_blanks = ignore_blanks
530        self.ignore_blank_cards = ignore_blank_cards
531
532        self.diff_extnames = ()
533        self.diff_extvers = ()
534        self.diff_extlevels = ()
535        self.diff_extension_types = ()
536        self.diff_headers = None
537        self.diff_data = None
538
539        super().__init__(a, b)
540
541    def _diff(self):
542        if self.a.name != self.b.name:
543            self.diff_extnames = (self.a.name, self.b.name)
544
545        if self.a.ver != self.b.ver:
546            self.diff_extvers = (self.a.ver, self.b.ver)
547
548        if self.a.level != self.b.level:
549            self.diff_extlevels = (self.a.level, self.b.level)
550
551        if self.a.header.get('XTENSION') != self.b.header.get('XTENSION'):
552            self.diff_extension_types = (self.a.header.get('XTENSION'),
553                                         self.b.header.get('XTENSION'))
554
555        self.diff_headers = HeaderDiff.fromdiff(self, self.a.header.copy(),
556                                                self.b.header.copy())
557
558        if self.a.data is None or self.b.data is None:
559            # TODO: Perhaps have some means of marking this case
560            pass
561        elif self.a.is_image and self.b.is_image:
562            self.diff_data = ImageDataDiff.fromdiff(self, self.a.data,
563                                                    self.b.data)
564            # Clean up references to (possibly) memmapped arrays so they can
565            # be closed by .close()
566            self.diff_data.a = None
567            self.diff_data.b = None
568        elif (isinstance(self.a, _TableLikeHDU) and
569              isinstance(self.b, _TableLikeHDU)):
570            # TODO: Replace this if/when _BaseHDU grows a .is_table property
571            self.diff_data = TableDataDiff.fromdiff(self, self.a.data,
572                                                    self.b.data)
573            # Clean up references to (possibly) memmapped arrays so they can
574            # be closed by .close()
575            self.diff_data.a = None
576            self.diff_data.b = None
577        elif not self.diff_extension_types:
578            # Don't diff the data for unequal extension types that are not
579            # recognized image or table types
580            self.diff_data = RawDataDiff.fromdiff(self, self.a.data,
581                                                  self.b.data)
582            # Clean up references to (possibly) memmapped arrays so they can
583            # be closed by .close()
584            self.diff_data.a = None
585            self.diff_data.b = None
586
587    def _report(self):
588        if self.identical:
589            self._writeln(" No differences found.")
590        if self.diff_extension_types:
591            self._writeln(" Extension types differ:\n  a: {}\n  "
592                          "b: {}".format(*self.diff_extension_types))
593        if self.diff_extnames:
594            self._writeln(" Extension names differ:\n  a: {}\n  "
595                          "b: {}".format(*self.diff_extnames))
596        if self.diff_extvers:
597            self._writeln(" Extension versions differ:\n  a: {}\n  "
598                          "b: {}".format(*self.diff_extvers))
599
600        if self.diff_extlevels:
601            self._writeln(" Extension levels differ:\n  a: {}\n  "
602                          "b: {}".format(*self.diff_extlevels))
603
604        if not self.diff_headers.identical:
605            self._fileobj.write('\n')
606            self._writeln(" Headers contain differences:")
607            self.diff_headers.report(self._fileobj, indent=self._indent + 1)
608
609        if self.diff_data is not None and not self.diff_data.identical:
610            self._fileobj.write('\n')
611            self._writeln(" Data contains differences:")
612            self.diff_data.report(self._fileobj, indent=self._indent + 1)
613
614
615class HeaderDiff(_BaseDiff):
616    """
617    Diff two `Header` objects.
618
619    `HeaderDiff` objects have the following diff attributes:
620
621    - ``diff_keyword_count``: If the two headers contain a different number of
622      keywords, this contains a 2-tuple of the keyword count for each header.
623
624    - ``diff_keywords``: If either header contains one or more keywords that
625      don't appear at all in the other header, this contains a 2-tuple
626      consisting of a list of the keywords only appearing in header a, and a
627      list of the keywords only appearing in header b.
628
629    - ``diff_duplicate_keywords``: If a keyword appears in both headers at
630      least once, but contains a different number of duplicates (for example, a
631      different number of HISTORY cards in each header), an item is added to
632      this dict with the keyword as the key, and a 2-tuple of the different
633      counts of that keyword as the value.  For example::
634
635          {'HISTORY': (20, 19)}
636
637      means that header a contains 20 HISTORY cards, while header b contains
638      only 19 HISTORY cards.
639
640    - ``diff_keyword_values``: If any of the common keyword between the two
641      headers have different values, they appear in this dict.  It has a
642      structure similar to ``diff_duplicate_keywords``, with the keyword as the
643      key, and a 2-tuple of the different values as the value.  For example::
644
645          {'NAXIS': (2, 3)}
646
647      means that the NAXIS keyword has a value of 2 in header a, and a value of
648      3 in header b.  This excludes any keywords matched by the
649      ``ignore_keywords`` list.
650
651    - ``diff_keyword_comments``: Like ``diff_keyword_values``, but contains
652      differences between keyword comments.
653
654    `HeaderDiff` objects also have a ``common_keywords`` attribute that lists
655    all keywords that appear in both headers.
656    """
657
658    def __init__(self, a, b, ignore_keywords=[], ignore_comments=[],
659                 rtol=0.0, atol=0.0, ignore_blanks=True, ignore_blank_cards=True):
660        """
661        Parameters
662        ----------
663        a : `~astropy.io.fits.Header` or string or bytes
664            A header.
665
666        b : `~astropy.io.fits.Header` or string or bytes
667            A header to compare to the first header.
668
669        ignore_keywords : sequence, optional
670            Header keywords to ignore when comparing two headers; the presence
671            of these keywords and their values are ignored.  Wildcard strings
672            may also be included in the list.
673
674        ignore_comments : sequence, optional
675            A list of header keywords whose comments should be ignored in the
676            comparison.  May contain wildcard strings as with ignore_keywords.
677
678        numdiffs : int, optional
679            The number of pixel/table values to output when reporting HDU data
680            differences.  Though the count of differences is the same either
681            way, this allows controlling the number of different values that
682            are kept in memory or output.  If a negative value is given, then
683            numdiffs is treated as unlimited (default: 10).
684
685        rtol : float, optional
686            The relative difference to allow when comparing two float values
687            either in header values, image arrays, or table columns
688            (default: 0.0). Values which satisfy the expression
689
690            .. math::
691
692                \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right|
693
694            are considered to be different.
695            The underlying function used for comparison is `numpy.allclose`.
696
697            .. versionadded:: 2.0
698
699        atol : float, optional
700            The allowed absolute difference. See also ``rtol`` parameter.
701
702            .. versionadded:: 2.0
703
704        ignore_blanks : bool, optional
705            Ignore extra whitespace at the end of string values either in
706            headers or data. Extra leading whitespace is not ignored
707            (default: True).
708
709        ignore_blank_cards : bool, optional
710            Ignore all cards that are blank, i.e. they only contain
711            whitespace (default: True).
712        """
713
714        self.ignore_keywords = {k.upper() for k in ignore_keywords}
715        self.ignore_comments = {k.upper() for k in ignore_comments}
716
717        self.rtol = rtol
718        self.atol = atol
719
720        self.ignore_blanks = ignore_blanks
721        self.ignore_blank_cards = ignore_blank_cards
722
723        self.ignore_keyword_patterns = set()
724        self.ignore_comment_patterns = set()
725        for keyword in list(self.ignore_keywords):
726            keyword = keyword.upper()
727            if keyword != '*' and glob.has_magic(keyword):
728                self.ignore_keywords.remove(keyword)
729                self.ignore_keyword_patterns.add(keyword)
730        for keyword in list(self.ignore_comments):
731            keyword = keyword.upper()
732            if keyword != '*' and glob.has_magic(keyword):
733                self.ignore_comments.remove(keyword)
734                self.ignore_comment_patterns.add(keyword)
735
736        # Keywords appearing in each header
737        self.common_keywords = []
738
739        # Set to the number of keywords in each header if the counts differ
740        self.diff_keyword_count = ()
741
742        # Set if the keywords common to each header (excluding ignore_keywords)
743        # appear in different positions within the header
744        # TODO: Implement this
745        self.diff_keyword_positions = ()
746
747        # Keywords unique to each header (excluding keywords in
748        # ignore_keywords)
749        self.diff_keywords = ()
750
751        # Keywords that have different numbers of duplicates in each header
752        # (excluding keywords in ignore_keywords)
753        self.diff_duplicate_keywords = {}
754
755        # Keywords common to each header but having different values (excluding
756        # keywords in ignore_keywords)
757        self.diff_keyword_values = defaultdict(list)
758
759        # Keywords common to each header but having different comments
760        # (excluding keywords in ignore_keywords or in ignore_comments)
761        self.diff_keyword_comments = defaultdict(list)
762
763        if isinstance(a, str):
764            a = Header.fromstring(a)
765        if isinstance(b, str):
766            b = Header.fromstring(b)
767
768        if not (isinstance(a, Header) and isinstance(b, Header)):
769            raise TypeError('HeaderDiff can only diff astropy.io.fits.Header '
770                            'objects or strings containing FITS headers.')
771
772        super().__init__(a, b)
773
774    # TODO: This doesn't pay much attention to the *order* of the keywords,
775    # except in the case of duplicate keywords.  The order should be checked
776    # too, or at least it should be an option.
777    def _diff(self):
778        if self.ignore_blank_cards:
779            cardsa = [c for c in self.a.cards if str(c) != BLANK_CARD]
780            cardsb = [c for c in self.b.cards if str(c) != BLANK_CARD]
781        else:
782            cardsa = list(self.a.cards)
783            cardsb = list(self.b.cards)
784
785        # build dictionaries of keyword values and comments
786        def get_header_values_comments(cards):
787            values = {}
788            comments = {}
789            for card in cards:
790                value = card.value
791                if self.ignore_blanks and isinstance(value, str):
792                    value = value.rstrip()
793                values.setdefault(card.keyword, []).append(value)
794                comments.setdefault(card.keyword, []).append(card.comment)
795            return values, comments
796
797        valuesa, commentsa = get_header_values_comments(cardsa)
798        valuesb, commentsb = get_header_values_comments(cardsb)
799
800        # Normalize all keyword to upper-case for comparison's sake;
801        # TODO: HIERARCH keywords should be handled case-sensitively I think
802        keywordsa = {k.upper() for k in valuesa}
803        keywordsb = {k.upper() for k in valuesb}
804
805        self.common_keywords = sorted(keywordsa.intersection(keywordsb))
806        if len(cardsa) != len(cardsb):
807            self.diff_keyword_count = (len(cardsa), len(cardsb))
808
809        # Any other diff attributes should exclude ignored keywords
810        keywordsa = keywordsa.difference(self.ignore_keywords)
811        keywordsb = keywordsb.difference(self.ignore_keywords)
812        if self.ignore_keyword_patterns:
813            for pattern in self.ignore_keyword_patterns:
814                keywordsa = keywordsa.difference(fnmatch.filter(keywordsa,
815                                                                pattern))
816                keywordsb = keywordsb.difference(fnmatch.filter(keywordsb,
817                                                                pattern))
818
819        if '*' in self.ignore_keywords:
820            # Any other differences between keywords are to be ignored
821            return
822
823        left_only_keywords = sorted(keywordsa.difference(keywordsb))
824        right_only_keywords = sorted(keywordsb.difference(keywordsa))
825
826        if left_only_keywords or right_only_keywords:
827            self.diff_keywords = (left_only_keywords, right_only_keywords)
828
829        # Compare count of each common keyword
830        for keyword in self.common_keywords:
831            if keyword in self.ignore_keywords:
832                continue
833            if self.ignore_keyword_patterns:
834                skip = False
835                for pattern in self.ignore_keyword_patterns:
836                    if fnmatch.fnmatch(keyword, pattern):
837                        skip = True
838                        break
839                if skip:
840                    continue
841
842            counta = len(valuesa[keyword])
843            countb = len(valuesb[keyword])
844            if counta != countb:
845                self.diff_duplicate_keywords[keyword] = (counta, countb)
846
847            # Compare keywords' values and comments
848            for a, b in zip(valuesa[keyword], valuesb[keyword]):
849                if diff_values(a, b, rtol=self.rtol, atol=self.atol):
850                    self.diff_keyword_values[keyword].append((a, b))
851                else:
852                    # If there are duplicate keywords we need to be able to
853                    # index each duplicate; if the values of a duplicate
854                    # are identical use None here
855                    self.diff_keyword_values[keyword].append(None)
856
857            if not any(self.diff_keyword_values[keyword]):
858                # No differences found; delete the array of Nones
859                del self.diff_keyword_values[keyword]
860
861            if '*' in self.ignore_comments or keyword in self.ignore_comments:
862                continue
863            if self.ignore_comment_patterns:
864                skip = False
865                for pattern in self.ignore_comment_patterns:
866                    if fnmatch.fnmatch(keyword, pattern):
867                        skip = True
868                        break
869                if skip:
870                    continue
871
872            for a, b in zip(commentsa[keyword], commentsb[keyword]):
873                if diff_values(a, b):
874                    self.diff_keyword_comments[keyword].append((a, b))
875                else:
876                    self.diff_keyword_comments[keyword].append(None)
877
878            if not any(self.diff_keyword_comments[keyword]):
879                del self.diff_keyword_comments[keyword]
880
881    def _report(self):
882        if self.diff_keyword_count:
883            self._writeln(' Headers have different number of cards:')
884            self._writeln(f'  a: {self.diff_keyword_count[0]}')
885            self._writeln(f'  b: {self.diff_keyword_count[1]}')
886        if self.diff_keywords:
887            for keyword in self.diff_keywords[0]:
888                if keyword in Card._commentary_keywords:
889                    val = self.a[keyword][0]
890                else:
891                    val = self.a[keyword]
892                self._writeln(f' Extra keyword {keyword!r:8} in a: {val!r}')
893            for keyword in self.diff_keywords[1]:
894                if keyword in Card._commentary_keywords:
895                    val = self.b[keyword][0]
896                else:
897                    val = self.b[keyword]
898                self._writeln(f' Extra keyword {keyword!r:8} in b: {val!r}')
899
900        if self.diff_duplicate_keywords:
901            for keyword, count in sorted(self.diff_duplicate_keywords.items()):
902                self._writeln(f' Inconsistent duplicates of keyword {keyword!r:8}:')
903                self._writeln('  Occurs {} time(s) in a, {} times in (b)'
904                              .format(*count))
905
906        if self.diff_keyword_values or self.diff_keyword_comments:
907            for keyword in self.common_keywords:
908                report_diff_keyword_attr(self._fileobj, 'values',
909                                         self.diff_keyword_values, keyword,
910                                         ind=self._indent)
911                report_diff_keyword_attr(self._fileobj, 'comments',
912                                         self.diff_keyword_comments, keyword,
913                                         ind=self._indent)
914
915# TODO: It might be good if there was also a threshold option for percentage of
916# different pixels: For example ignore if only 1% of the pixels are different
917# within some threshold.  There are lots of possibilities here, but hold off
918# for now until specific cases come up.
919
920
921class ImageDataDiff(_BaseDiff):
922    """
923    Diff two image data arrays (really any array from a PRIMARY HDU or an IMAGE
924    extension HDU, though the data unit is assumed to be "pixels").
925
926    `ImageDataDiff` objects have the following diff attributes:
927
928    - ``diff_dimensions``: If the two arrays contain either a different number
929      of dimensions or different sizes in any dimension, this contains a
930      2-tuple of the shapes of each array.  Currently no further comparison is
931      performed on images that don't have the exact same dimensions.
932
933    - ``diff_pixels``: If the two images contain any different pixels, this
934      contains a list of 2-tuples of the array index where the difference was
935      found, and another 2-tuple containing the different values.  For example,
936      if the pixel at (0, 0) contains different values this would look like::
937
938          [(0, 0), (1.1, 2.2)]
939
940      where 1.1 and 2.2 are the values of that pixel in each array.  This
941      array only contains up to ``self.numdiffs`` differences, for storage
942      efficiency.
943
944    - ``diff_total``: The total number of different pixels found between the
945      arrays.  Although ``diff_pixels`` does not necessarily contain all the
946      different pixel values, this can be used to get a count of the total
947      number of differences found.
948
949    - ``diff_ratio``: Contains the ratio of ``diff_total`` to the total number
950      of pixels in the arrays.
951    """
952
953    def __init__(self, a, b, numdiffs=10, rtol=0.0, atol=0.0):
954        """
955        Parameters
956        ----------
957        a : BaseHDU
958            An HDU object.
959
960        b : BaseHDU
961            An HDU object to compare to the first HDU object.
962
963        numdiffs : int, optional
964            The number of pixel/table values to output when reporting HDU data
965            differences.  Though the count of differences is the same either
966            way, this allows controlling the number of different values that
967            are kept in memory or output.  If a negative value is given, then
968            numdiffs is treated as unlimited (default: 10).
969
970        rtol : float, optional
971            The relative difference to allow when comparing two float values
972            either in header values, image arrays, or table columns
973            (default: 0.0). Values which satisfy the expression
974
975            .. math::
976
977                \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right|
978
979            are considered to be different.
980            The underlying function used for comparison is `numpy.allclose`.
981
982            .. versionadded:: 2.0
983
984        atol : float, optional
985            The allowed absolute difference. See also ``rtol`` parameter.
986
987            .. versionadded:: 2.0
988        """
989
990        self.numdiffs = numdiffs
991        self.rtol = rtol
992        self.atol = atol
993
994        self.diff_dimensions = ()
995        self.diff_pixels = []
996        self.diff_ratio = 0
997
998        # self.diff_pixels only holds up to numdiffs differing pixels, but this
999        # self.diff_total stores the total count of differences between
1000        # the images, but not the different values
1001        self.diff_total = 0
1002
1003        super().__init__(a, b)
1004
1005    def _diff(self):
1006        if self.a.shape != self.b.shape:
1007            self.diff_dimensions = (self.a.shape, self.b.shape)
1008            # Don't do any further comparison if the dimensions differ
1009            # TODO: Perhaps we could, however, diff just the intersection
1010            # between the two images
1011            return
1012
1013        # Find the indices where the values are not equal
1014        # If neither a nor b are floating point (or complex), ignore rtol and
1015        # atol
1016        if not (np.issubdtype(self.a.dtype, np.inexact) or
1017                np.issubdtype(self.b.dtype, np.inexact)):
1018            rtol = 0
1019            atol = 0
1020        else:
1021            rtol = self.rtol
1022            atol = self.atol
1023
1024        diffs = where_not_allclose(self.a, self.b, atol=atol, rtol=rtol)
1025
1026        self.diff_total = len(diffs[0])
1027
1028        if self.diff_total == 0:
1029            # Then we're done
1030            return
1031
1032        if self.numdiffs < 0:
1033            numdiffs = self.diff_total
1034        else:
1035            numdiffs = self.numdiffs
1036
1037        self.diff_pixels = [(idx, (self.a[idx], self.b[idx]))
1038                            for idx in islice(zip(*diffs), 0, numdiffs)]
1039        self.diff_ratio = float(self.diff_total) / float(len(self.a.flat))
1040
1041    def _report(self):
1042        if self.diff_dimensions:
1043            dimsa = ' x '.join(str(d) for d in
1044                               reversed(self.diff_dimensions[0]))
1045            dimsb = ' x '.join(str(d) for d in
1046                               reversed(self.diff_dimensions[1]))
1047            self._writeln(' Data dimensions differ:')
1048            self._writeln(f'  a: {dimsa}')
1049            self._writeln(f'  b: {dimsb}')
1050            # For now we don't do any further comparison if the dimensions
1051            # differ; though in the future it might be nice to be able to
1052            # compare at least where the images intersect
1053            self._writeln(' No further data comparison performed.')
1054            return
1055
1056        if not self.diff_pixels:
1057            return
1058
1059        for index, values in self.diff_pixels:
1060            index = [x + 1 for x in reversed(index)]
1061            self._writeln(f' Data differs at {index}:')
1062            report_diff_values(values[0], values[1], fileobj=self._fileobj,
1063                               indent_width=self._indent + 1)
1064
1065        if self.diff_total > self.numdiffs:
1066            self._writeln(' ...')
1067        self._writeln(' {} different pixels found ({:.2%} different).'
1068                      .format(self.diff_total, self.diff_ratio))
1069
1070
1071class RawDataDiff(ImageDataDiff):
1072    """
1073    `RawDataDiff` is just a special case of `ImageDataDiff` where the images
1074    are one-dimensional, and the data is treated as a 1-dimensional array of
1075    bytes instead of pixel values.  This is used to compare the data of two
1076    non-standard extension HDUs that were not recognized as containing image or
1077    table data.
1078
1079    `ImageDataDiff` objects have the following diff attributes:
1080
1081    - ``diff_dimensions``: Same as the ``diff_dimensions`` attribute of
1082      `ImageDataDiff` objects. Though the "dimension" of each array is just an
1083      integer representing the number of bytes in the data.
1084
1085    - ``diff_bytes``: Like the ``diff_pixels`` attribute of `ImageDataDiff`
1086      objects, but renamed to reflect the minor semantic difference that these
1087      are raw bytes and not pixel values.  Also the indices are integers
1088      instead of tuples.
1089
1090    - ``diff_total`` and ``diff_ratio``: Same as `ImageDataDiff`.
1091    """
1092
1093    def __init__(self, a, b, numdiffs=10):
1094        """
1095        Parameters
1096        ----------
1097        a : BaseHDU
1098            An HDU object.
1099
1100        b : BaseHDU
1101            An HDU object to compare to the first HDU object.
1102
1103        numdiffs : int, optional
1104            The number of pixel/table values to output when reporting HDU data
1105            differences.  Though the count of differences is the same either
1106            way, this allows controlling the number of different values that
1107            are kept in memory or output.  If a negative value is given, then
1108            numdiffs is treated as unlimited (default: 10).
1109        """
1110
1111        self.diff_dimensions = ()
1112        self.diff_bytes = []
1113
1114        super().__init__(a, b, numdiffs=numdiffs)
1115
1116    def _diff(self):
1117        super()._diff()
1118        if self.diff_dimensions:
1119            self.diff_dimensions = (self.diff_dimensions[0][0],
1120                                    self.diff_dimensions[1][0])
1121
1122        self.diff_bytes = [(x[0], y) for x, y in self.diff_pixels]
1123        del self.diff_pixels
1124
1125    def _report(self):
1126        if self.diff_dimensions:
1127            self._writeln(' Data sizes differ:')
1128            self._writeln(f'  a: {self.diff_dimensions[0]} bytes')
1129            self._writeln(f'  b: {self.diff_dimensions[1]} bytes')
1130            # For now we don't do any further comparison if the dimensions
1131            # differ; though in the future it might be nice to be able to
1132            # compare at least where the images intersect
1133            self._writeln(' No further data comparison performed.')
1134            return
1135
1136        if not self.diff_bytes:
1137            return
1138
1139        for index, values in self.diff_bytes:
1140            self._writeln(f' Data differs at byte {index}:')
1141            report_diff_values(values[0], values[1], fileobj=self._fileobj,
1142                               indent_width=self._indent + 1)
1143
1144        self._writeln(' ...')
1145        self._writeln(' {} different bytes found ({:.2%} different).'
1146                      .format(self.diff_total, self.diff_ratio))
1147
1148
1149class TableDataDiff(_BaseDiff):
1150    """
1151    Diff two table data arrays. It doesn't matter whether the data originally
1152    came from a binary or ASCII table--the data should be passed in as a
1153    recarray.
1154
1155    `TableDataDiff` objects have the following diff attributes:
1156
1157    - ``diff_column_count``: If the tables being compared have different
1158      numbers of columns, this contains a 2-tuple of the column count in each
1159      table.  Even if the tables have different column counts, an attempt is
1160      still made to compare any columns they have in common.
1161
1162    - ``diff_columns``: If either table contains columns unique to that table,
1163      either in name or format, this contains a 2-tuple of lists. The first
1164      element is a list of columns (these are full `Column` objects) that
1165      appear only in table a.  The second element is a list of tables that
1166      appear only in table b.  This only lists columns with different column
1167      definitions, and has nothing to do with the data in those columns.
1168
1169    - ``diff_column_names``: This is like ``diff_columns``, but lists only the
1170      names of columns unique to either table, rather than the full `Column`
1171      objects.
1172
1173    - ``diff_column_attributes``: Lists columns that are in both tables but
1174      have different secondary attributes, such as TUNIT or TDISP.  The format
1175      is a list of 2-tuples: The first a tuple of the column name and the
1176      attribute, the second a tuple of the different values.
1177
1178    - ``diff_values``: `TableDataDiff` compares the data in each table on a
1179      column-by-column basis.  If any different data is found, it is added to
1180      this list.  The format of this list is similar to the ``diff_pixels``
1181      attribute on `ImageDataDiff` objects, though the "index" consists of a
1182      (column_name, row) tuple.  For example::
1183
1184          [('TARGET', 0), ('NGC1001', 'NGC1002')]
1185
1186      shows that the tables contain different values in the 0-th row of the
1187      'TARGET' column.
1188
1189    - ``diff_total`` and ``diff_ratio``: Same as `ImageDataDiff`.
1190
1191    `TableDataDiff` objects also have a ``common_columns`` attribute that lists
1192    the `Column` objects for columns that are identical in both tables, and a
1193    ``common_column_names`` attribute which contains a set of the names of
1194    those columns.
1195    """
1196
1197    def __init__(self, a, b, ignore_fields=[], numdiffs=10, rtol=0.0, atol=0.0):
1198        """
1199        Parameters
1200        ----------
1201        a : BaseHDU
1202            An HDU object.
1203
1204        b : BaseHDU
1205            An HDU object to compare to the first HDU object.
1206
1207        ignore_fields : sequence, optional
1208            The (case-insensitive) names of any table columns to ignore if any
1209            table data is to be compared.
1210
1211        numdiffs : int, optional
1212            The number of pixel/table values to output when reporting HDU data
1213            differences.  Though the count of differences is the same either
1214            way, this allows controlling the number of different values that
1215            are kept in memory or output.  If a negative value is given, then
1216            numdiffs is treated as unlimited (default: 10).
1217
1218        rtol : float, optional
1219            The relative difference to allow when comparing two float values
1220            either in header values, image arrays, or table columns
1221            (default: 0.0). Values which satisfy the expression
1222
1223            .. math::
1224
1225                \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right|
1226
1227            are considered to be different.
1228            The underlying function used for comparison is `numpy.allclose`.
1229
1230            .. versionadded:: 2.0
1231
1232        atol : float, optional
1233            The allowed absolute difference. See also ``rtol`` parameter.
1234
1235            .. versionadded:: 2.0
1236        """
1237
1238        self.ignore_fields = set(ignore_fields)
1239        self.numdiffs = numdiffs
1240        self.rtol = rtol
1241        self.atol = atol
1242
1243        self.common_columns = []
1244        self.common_column_names = set()
1245
1246        # self.diff_columns contains columns with different column definitions,
1247        # but not different column data. Column data is only compared in
1248        # columns that have the same definitions
1249        self.diff_rows = ()
1250        self.diff_column_count = ()
1251        self.diff_columns = ()
1252
1253        # If two columns have the same name+format, but other attributes are
1254        # different (such as TUNIT or such) they are listed here
1255        self.diff_column_attributes = []
1256
1257        # Like self.diff_columns, but just contains a list of the column names
1258        # unique to each table, and in the order they appear in the tables
1259        self.diff_column_names = ()
1260        self.diff_values = []
1261
1262        self.diff_ratio = 0
1263        self.diff_total = 0
1264
1265        super().__init__(a, b)
1266
1267    def _diff(self):
1268        # Much of the code for comparing columns is similar to the code for
1269        # comparing headers--consider refactoring
1270        colsa = self.a.columns
1271        colsb = self.b.columns
1272
1273        if len(colsa) != len(colsb):
1274            self.diff_column_count = (len(colsa), len(colsb))
1275
1276        # Even if the number of columns are unequal, we still do comparison of
1277        # any common columns
1278        colsa = {c.name.lower(): c for c in colsa}
1279        colsb = {c.name.lower(): c for c in colsb}
1280
1281        if '*' in self.ignore_fields:
1282            # If all columns are to be ignored, ignore any further differences
1283            # between the columns
1284            return
1285
1286        # Keep the user's original ignore_fields list for reporting purposes,
1287        # but internally use a case-insensitive version
1288        ignore_fields = {f.lower() for f in self.ignore_fields}
1289
1290        # It might be nice if there were a cleaner way to do this, but for now
1291        # it'll do
1292        for fieldname in ignore_fields:
1293            fieldname = fieldname.lower()
1294            if fieldname in colsa:
1295                del colsa[fieldname]
1296            if fieldname in colsb:
1297                del colsb[fieldname]
1298
1299        colsa_set = set(colsa.values())
1300        colsb_set = set(colsb.values())
1301        self.common_columns = sorted(colsa_set.intersection(colsb_set),
1302                                     key=operator.attrgetter('name'))
1303
1304        self.common_column_names = {col.name.lower()
1305                                    for col in self.common_columns}
1306
1307        left_only_columns = {col.name.lower(): col
1308                             for col in colsa_set.difference(colsb_set)}
1309        right_only_columns = {col.name.lower(): col
1310                              for col in colsb_set.difference(colsa_set)}
1311
1312        if left_only_columns or right_only_columns:
1313            self.diff_columns = (left_only_columns, right_only_columns)
1314            self.diff_column_names = ([], [])
1315
1316        if left_only_columns:
1317            for col in self.a.columns:
1318                if col.name.lower() in left_only_columns:
1319                    self.diff_column_names[0].append(col.name)
1320
1321        if right_only_columns:
1322            for col in self.b.columns:
1323                if col.name.lower() in right_only_columns:
1324                    self.diff_column_names[1].append(col.name)
1325
1326        # If the tables have a different number of rows, we don't compare the
1327        # columns right now.
1328        # TODO: It might be nice to optionally compare the first n rows where n
1329        # is the minimum of the row counts between the two tables.
1330        if len(self.a) != len(self.b):
1331            self.diff_rows = (len(self.a), len(self.b))
1332            return
1333
1334        # If the tables contain no rows there's no data to compare, so we're
1335        # done at this point. (See ticket #178)
1336        if len(self.a) == len(self.b) == 0:
1337            return
1338
1339        # Like in the old fitsdiff, compare tables on a column by column basis
1340        # The difficulty here is that, while FITS column names are meant to be
1341        # case-insensitive, Astropy still allows, for the sake of flexibility,
1342        # two columns with the same name but different case.  When columns are
1343        # accessed in FITS tables, a case-sensitive is tried first, and failing
1344        # that a case-insensitive match is made.
1345        # It's conceivable that the same column could appear in both tables
1346        # being compared, but with different case.
1347        # Though it *may* lead to inconsistencies in these rare cases, this
1348        # just assumes that there are no duplicated column names in either
1349        # table, and that the column names can be treated case-insensitively.
1350        for col in self.common_columns:
1351            name_lower = col.name.lower()
1352            if name_lower in ignore_fields:
1353                continue
1354
1355            cola = colsa[name_lower]
1356            colb = colsb[name_lower]
1357
1358            for attr, _ in _COL_ATTRS:
1359                vala = getattr(cola, attr, None)
1360                valb = getattr(colb, attr, None)
1361                if diff_values(vala, valb):
1362                    self.diff_column_attributes.append(
1363                        ((col.name.upper(), attr), (vala, valb)))
1364
1365            arra = self.a[col.name]
1366            arrb = self.b[col.name]
1367
1368            if (np.issubdtype(arra.dtype, np.floating) and
1369                    np.issubdtype(arrb.dtype, np.floating)):
1370                diffs = where_not_allclose(arra, arrb,
1371                                           rtol=self.rtol,
1372                                           atol=self.atol)
1373            elif 'P' in col.format:
1374                diffs = ([idx for idx in range(len(arra))
1375                          if not np.allclose(arra[idx], arrb[idx],
1376                                             rtol=self.rtol,
1377                                             atol=self.atol)],)
1378            else:
1379                diffs = np.where(arra != arrb)
1380
1381            self.diff_total += len(set(diffs[0]))
1382
1383            if self.numdiffs >= 0:
1384                if len(self.diff_values) >= self.numdiffs:
1385                    # Don't save any more diff values
1386                    continue
1387
1388                # Add no more diff'd values than this
1389                max_diffs = self.numdiffs - len(self.diff_values)
1390            else:
1391                max_diffs = len(diffs[0])
1392
1393            last_seen_idx = None
1394            for idx in islice(diffs[0], 0, max_diffs):
1395                if idx == last_seen_idx:
1396                    # Skip duplicate indices, which my occur when the column
1397                    # data contains multi-dimensional values; we're only
1398                    # interested in storing row-by-row differences
1399                    continue
1400                last_seen_idx = idx
1401                self.diff_values.append(((col.name, idx),
1402                                         (arra[idx], arrb[idx])))
1403
1404        total_values = len(self.a) * len(self.a.dtype.fields)
1405        self.diff_ratio = float(self.diff_total) / float(total_values)
1406
1407    def _report(self):
1408        if self.diff_column_count:
1409            self._writeln(' Tables have different number of columns:')
1410            self._writeln(f'  a: {self.diff_column_count[0]}')
1411            self._writeln(f'  b: {self.diff_column_count[1]}')
1412
1413        if self.diff_column_names:
1414            # Show columns with names unique to either table
1415            for name in self.diff_column_names[0]:
1416                format = self.diff_columns[0][name.lower()].format
1417                self._writeln(f' Extra column {name} of format {format} in a')
1418            for name in self.diff_column_names[1]:
1419                format = self.diff_columns[1][name.lower()].format
1420                self._writeln(f' Extra column {name} of format {format} in b')
1421
1422        col_attrs = dict(_COL_ATTRS)
1423        # Now go through each table again and show columns with common
1424        # names but other property differences...
1425        for col_attr, vals in self.diff_column_attributes:
1426            name, attr = col_attr
1427            self._writeln(f' Column {name} has different {col_attrs[attr]}:')
1428            report_diff_values(vals[0], vals[1], fileobj=self._fileobj,
1429                               indent_width=self._indent + 1)
1430
1431        if self.diff_rows:
1432            self._writeln(' Table rows differ:')
1433            self._writeln(f'  a: {self.diff_rows[0]}')
1434            self._writeln(f'  b: {self.diff_rows[1]}')
1435            self._writeln(' No further data comparison performed.')
1436            return
1437
1438        if not self.diff_values:
1439            return
1440
1441        # Finally, let's go through and report column data differences:
1442        for indx, values in self.diff_values:
1443            self._writeln(' Column {} data differs in row {}:'.format(*indx))
1444            report_diff_values(values[0], values[1], fileobj=self._fileobj,
1445                               indent_width=self._indent + 1)
1446
1447        if self.diff_values and self.numdiffs < self.diff_total:
1448            self._writeln(' ...{} additional difference(s) found.'.format(
1449                                self.diff_total - self.numdiffs))
1450
1451        if self.diff_total > self.numdiffs:
1452            self._writeln(' ...')
1453
1454        self._writeln(' {} different table data element(s) found '
1455                      '({:.2%} different).'
1456                      .format(self.diff_total, self.diff_ratio))
1457
1458
1459def report_diff_keyword_attr(fileobj, attr, diffs, keyword, ind=0):
1460    """
1461    Write a diff between two header keyword values or comments to the specified
1462    file-like object.
1463    """
1464
1465    if keyword in diffs:
1466        vals = diffs[keyword]
1467        for idx, val in enumerate(vals):
1468            if val is None:
1469                continue
1470            if idx == 0:
1471                dup = ''
1472            else:
1473                dup = f'[{idx + 1}]'
1474            fileobj.write(
1475                fixed_width_indent(' Keyword {:8}{} has different {}:\n'
1476                                   .format(keyword, dup, attr), ind))
1477            report_diff_values(val[0], val[1], fileobj=fileobj,
1478                               indent_width=ind + 1)
1479