1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2""" 3Facilities for diffing two FITS files. Includes objects for diffing entire 4FITS files, individual HDUs, FITS headers, or just FITS data. 5 6Used to implement the fitsdiff program. 7""" 8import fnmatch 9import glob 10import io 11import operator 12import os 13import os.path 14import textwrap 15 16from collections import defaultdict 17from inspect import signature 18from itertools import islice 19 20import numpy as np 21 22from astropy import __version__ 23 24from .card import Card, BLANK_CARD 25from .header import Header 26from astropy.utils.decorators import deprecated_renamed_argument 27# HDUList is used in one of the doctests 28from .hdu.hdulist import fitsopen, HDUList # pylint: disable=W0611 29from .hdu.table import _TableLikeHDU 30from astropy.utils.diff import (report_diff_values, fixed_width_indent, 31 where_not_allclose, diff_values) 32from astropy.utils.misc import NOT_OVERWRITING_MSG 33 34__all__ = ['FITSDiff', 'HDUDiff', 'HeaderDiff', 'ImageDataDiff', 'RawDataDiff', 35 'TableDataDiff'] 36 37# Column attributes of interest for comparison 38_COL_ATTRS = [('unit', 'units'), ('null', 'null values'), 39 ('bscale', 'bscales'), ('bzero', 'bzeros'), 40 ('disp', 'display formats'), ('dim', 'dimensions')] 41 42 43class _BaseDiff: 44 """ 45 Base class for all FITS diff objects. 46 47 When instantiating a FITS diff object, the first two arguments are always 48 the two objects to diff (two FITS files, two FITS headers, etc.). 49 Instantiating a ``_BaseDiff`` also causes the diff itself to be executed. 50 The returned ``_BaseDiff`` instance has a number of attribute that describe 51 the results of the diff operation. 52 53 The most basic attribute, present on all ``_BaseDiff`` instances, is 54 ``.identical`` which is `True` if the two objects being compared are 55 identical according to the diff method for objects of that type. 56 """ 57 58 def __init__(self, a, b): 59 """ 60 The ``_BaseDiff`` class does not implement a ``_diff`` method and 61 should not be instantiated directly. Instead instantiate the 62 appropriate subclass of ``_BaseDiff`` for the objects being compared 63 (for example, use `HeaderDiff` to compare two `Header` objects. 64 """ 65 66 self.a = a 67 self.b = b 68 69 # For internal use in report output 70 self._fileobj = None 71 self._indent = 0 72 73 self._diff() 74 75 def __bool__(self): 76 """ 77 A ``_BaseDiff`` object acts as `True` in a boolean context if the two 78 objects compared are identical. Otherwise it acts as `False`. 79 """ 80 81 return not self.identical 82 83 @classmethod 84 def fromdiff(cls, other, a, b): 85 """ 86 Returns a new Diff object of a specific subclass from an existing diff 87 object, passing on the values for any arguments they share in common 88 (such as ignore_keywords). 89 90 For example:: 91 92 >>> from astropy.io import fits 93 >>> hdul1, hdul2 = fits.HDUList(), fits.HDUList() 94 >>> headera, headerb = fits.Header(), fits.Header() 95 >>> fd = fits.FITSDiff(hdul1, hdul2, ignore_keywords=['*']) 96 >>> hd = fits.HeaderDiff.fromdiff(fd, headera, headerb) 97 >>> list(hd.ignore_keywords) 98 ['*'] 99 """ 100 101 sig = signature(cls.__init__) 102 # The first 3 arguments of any Diff initializer are self, a, and b. 103 kwargs = {} 104 for arg in list(sig.parameters.keys())[3:]: 105 if hasattr(other, arg): 106 kwargs[arg] = getattr(other, arg) 107 108 return cls(a, b, **kwargs) 109 110 @property 111 def identical(self): 112 """ 113 `True` if all the ``.diff_*`` attributes on this diff instance are 114 empty, implying that no differences were found. 115 116 Any subclass of ``_BaseDiff`` must have at least one ``.diff_*`` 117 attribute, which contains a non-empty value if and only if some 118 difference was found between the two objects being compared. 119 """ 120 121 return not any(getattr(self, attr) for attr in self.__dict__ 122 if attr.startswith('diff_')) 123 124 @deprecated_renamed_argument('clobber', 'overwrite', '2.0', 125 message='"clobber" was deprecated in version ' 126 '2.0 and will be removed in version ' 127 '5.1. Use argument "overwrite" ' 128 'instead.') 129 def report(self, fileobj=None, indent=0, overwrite=False): 130 """ 131 Generates a text report on the differences (if any) between two 132 objects, and either returns it as a string or writes it to a file-like 133 object. 134 135 Parameters 136 ---------- 137 fileobj : file-like, string, or None, optional 138 If `None`, this method returns the report as a string. Otherwise it 139 returns `None` and writes the report to the given file-like object 140 (which must have a ``.write()`` method at a minimum), or to a new 141 file at the path specified. 142 143 indent : int 144 The number of 4 space tabs to indent the report. 145 146 overwrite : bool, optional 147 If ``True``, overwrite the output file if it exists. Raises an 148 ``OSError`` if ``False`` and the output file exists. Default is 149 ``False``. 150 151 .. versionchanged:: 1.3 152 ``overwrite`` replaces the deprecated ``clobber`` argument. 153 154 Returns 155 ------- 156 report : str or None 157 """ 158 159 return_string = False 160 filepath = None 161 162 if isinstance(fileobj, str): 163 if os.path.exists(fileobj) and not overwrite: 164 raise OSError(NOT_OVERWRITING_MSG.format(fileobj)) 165 else: 166 filepath = fileobj 167 fileobj = open(filepath, 'w') 168 elif fileobj is None: 169 fileobj = io.StringIO() 170 return_string = True 171 172 self._fileobj = fileobj 173 self._indent = indent # This is used internally by _writeln 174 175 try: 176 self._report() 177 finally: 178 if filepath: 179 fileobj.close() 180 181 if return_string: 182 return fileobj.getvalue() 183 184 def _writeln(self, text): 185 self._fileobj.write(fixed_width_indent(text, self._indent) + '\n') 186 187 def _diff(self): 188 raise NotImplementedError 189 190 def _report(self): 191 raise NotImplementedError 192 193 194class FITSDiff(_BaseDiff): 195 """Diff two FITS files by filename, or two `HDUList` objects. 196 197 `FITSDiff` objects have the following diff attributes: 198 199 - ``diff_hdu_count``: If the FITS files being compared have different 200 numbers of HDUs, this contains a 2-tuple of the number of HDUs in each 201 file. 202 203 - ``diff_hdus``: If any HDUs with the same index are different, this 204 contains a list of 2-tuples of the HDU index and the `HDUDiff` object 205 representing the differences between the two HDUs. 206 """ 207 208 def __init__(self, a, b, ignore_hdus=[], ignore_keywords=[], 209 ignore_comments=[], ignore_fields=[], 210 numdiffs=10, rtol=0.0, atol=0.0, 211 ignore_blanks=True, ignore_blank_cards=True): 212 """ 213 Parameters 214 ---------- 215 a : str or `HDUList` 216 The filename of a FITS file on disk, or an `HDUList` object. 217 218 b : str or `HDUList` 219 The filename of a FITS file on disk, or an `HDUList` object to 220 compare to the first file. 221 222 ignore_hdus : sequence, optional 223 HDU names to ignore when comparing two FITS files or HDU lists; the 224 presence of these HDUs and their contents are ignored. Wildcard 225 strings may also be included in the list. 226 227 ignore_keywords : sequence, optional 228 Header keywords to ignore when comparing two headers; the presence 229 of these keywords and their values are ignored. Wildcard strings 230 may also be included in the list. 231 232 ignore_comments : sequence, optional 233 A list of header keywords whose comments should be ignored in the 234 comparison. May contain wildcard strings as with ignore_keywords. 235 236 ignore_fields : sequence, optional 237 The (case-insensitive) names of any table columns to ignore if any 238 table data is to be compared. 239 240 numdiffs : int, optional 241 The number of pixel/table values to output when reporting HDU data 242 differences. Though the count of differences is the same either 243 way, this allows controlling the number of different values that 244 are kept in memory or output. If a negative value is given, then 245 numdiffs is treated as unlimited (default: 10). 246 247 rtol : float, optional 248 The relative difference to allow when comparing two float values 249 either in header values, image arrays, or table columns 250 (default: 0.0). Values which satisfy the expression 251 252 .. math:: 253 254 \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right| 255 256 are considered to be different. 257 The underlying function used for comparison is `numpy.allclose`. 258 259 .. versionadded:: 2.0 260 261 atol : float, optional 262 The allowed absolute difference. See also ``rtol`` parameter. 263 264 .. versionadded:: 2.0 265 266 ignore_blanks : bool, optional 267 Ignore extra whitespace at the end of string values either in 268 headers or data. Extra leading whitespace is not ignored 269 (default: True). 270 271 ignore_blank_cards : bool, optional 272 Ignore all cards that are blank, i.e. they only contain 273 whitespace (default: True). 274 """ 275 276 if isinstance(a, (str, os.PathLike)): 277 try: 278 a = fitsopen(a) 279 except Exception as exc: 280 raise OSError("error opening file a ({}): {}: {}".format( 281 a, exc.__class__.__name__, exc.args[0])) 282 close_a = True 283 else: 284 close_a = False 285 286 if isinstance(b, (str, os.PathLike)): 287 try: 288 b = fitsopen(b) 289 except Exception as exc: 290 raise OSError("error opening file b ({}): {}: {}".format( 291 b, exc.__class__.__name__, exc.args[0])) 292 close_b = True 293 else: 294 close_b = False 295 296 # Normalize keywords/fields to ignore to upper case 297 self.ignore_hdus = set(k.upper() for k in ignore_hdus) 298 self.ignore_keywords = set(k.upper() for k in ignore_keywords) 299 self.ignore_comments = set(k.upper() for k in ignore_comments) 300 self.ignore_fields = set(k.upper() for k in ignore_fields) 301 302 self.numdiffs = numdiffs 303 self.rtol = rtol 304 self.atol = atol 305 306 self.ignore_blanks = ignore_blanks 307 self.ignore_blank_cards = ignore_blank_cards 308 309 # Some hdu names may be pattern wildcards. Find them. 310 self.ignore_hdu_patterns = set() 311 for name in list(self.ignore_hdus): 312 if name != '*' and glob.has_magic(name): 313 self.ignore_hdus.remove(name) 314 self.ignore_hdu_patterns.add(name) 315 316 self.diff_hdu_count = () 317 self.diff_hdus = [] 318 319 try: 320 super().__init__(a, b) 321 finally: 322 if close_a: 323 a.close() 324 if close_b: 325 b.close() 326 327 def _diff(self): 328 if len(self.a) != len(self.b): 329 self.diff_hdu_count = (len(self.a), len(self.b)) 330 331 # Record filenames for use later in _report 332 self.filenamea = self.a.filename() 333 if not self.filenamea: 334 self.filenamea = f'<{self.a.__class__.__name__} object at {id(self.a):#x}>' 335 336 self.filenameb = self.b.filename() 337 if not self.filenameb: 338 self.filenameb = f'<{self.b.__class__.__name__} object at {id(self.b):#x}>' 339 340 if self.ignore_hdus: 341 self.a = HDUList([h for h in self.a if h.name not in self.ignore_hdus]) 342 self.b = HDUList([h for h in self.b if h.name not in self.ignore_hdus]) 343 if self.ignore_hdu_patterns: 344 a_names = [hdu.name for hdu in self.a] 345 b_names = [hdu.name for hdu in self.b] 346 for pattern in self.ignore_hdu_patterns: 347 self.a = HDUList([h for h in self.a if h.name not in fnmatch.filter( 348 a_names, pattern)]) 349 self.b = HDUList([h for h in self.b if h.name not in fnmatch.filter( 350 b_names, pattern)]) 351 352 # For now, just compare the extensions one by one in order. 353 # Might allow some more sophisticated types of diffing later. 354 355 # TODO: Somehow or another simplify the passing around of diff 356 # options--this will become important as the number of options grows 357 for idx in range(min(len(self.a), len(self.b))): 358 hdu_diff = HDUDiff.fromdiff(self, self.a[idx], self.b[idx]) 359 360 if not hdu_diff.identical: 361 if self.a[idx].name == self.b[idx].name and self.a[idx].ver == self.b[idx].ver: 362 self.diff_hdus.append((idx, hdu_diff, self.a[idx].name, self.a[idx].ver)) 363 else: 364 self.diff_hdus.append((idx, hdu_diff, "", self.a[idx].ver)) 365 366 def _report(self): 367 wrapper = textwrap.TextWrapper(initial_indent=' ', 368 subsequent_indent=' ') 369 370 self._fileobj.write('\n') 371 self._writeln(f' fitsdiff: {__version__}') 372 self._writeln(f' a: {self.filenamea}\n b: {self.filenameb}') 373 374 if self.ignore_hdus: 375 ignore_hdus = ' '.join(sorted(self.ignore_hdus)) 376 self._writeln(f' HDU(s) not to be compared:\n{wrapper.fill(ignore_hdus)}') 377 378 if self.ignore_hdu_patterns: 379 ignore_hdu_patterns = ' '.join(sorted(self.ignore_hdu_patterns)) 380 self._writeln(' HDU(s) not to be compared:\n{}' 381 .format(wrapper.fill(ignore_hdu_patterns))) 382 383 if self.ignore_keywords: 384 ignore_keywords = ' '.join(sorted(self.ignore_keywords)) 385 self._writeln(' Keyword(s) not to be compared:\n{}' 386 .format(wrapper.fill(ignore_keywords))) 387 388 if self.ignore_comments: 389 ignore_comments = ' '.join(sorted(self.ignore_comments)) 390 self._writeln(' Keyword(s) whose comments are not to be compared' 391 ':\n{}'.format(wrapper.fill(ignore_comments))) 392 393 if self.ignore_fields: 394 ignore_fields = ' '.join(sorted(self.ignore_fields)) 395 self._writeln(' Table column(s) not to be compared:\n{}' 396 .format(wrapper.fill(ignore_fields))) 397 398 self._writeln(' Maximum number of different data values to be ' 399 'reported: {}'.format(self.numdiffs)) 400 self._writeln(' Relative tolerance: {}, Absolute tolerance: {}' 401 .format(self.rtol, self.atol)) 402 403 if self.diff_hdu_count: 404 self._fileobj.write('\n') 405 self._writeln('Files contain different numbers of HDUs:') 406 self._writeln(f' a: {self.diff_hdu_count[0]}') 407 self._writeln(f' b: {self.diff_hdu_count[1]}') 408 409 if not self.diff_hdus: 410 self._writeln('No differences found between common HDUs.') 411 return 412 elif not self.diff_hdus: 413 self._fileobj.write('\n') 414 self._writeln('No differences found.') 415 return 416 417 for idx, hdu_diff, extname, extver in self.diff_hdus: 418 # print out the extension heading 419 if idx == 0: 420 self._fileobj.write('\n') 421 self._writeln('Primary HDU:') 422 else: 423 self._fileobj.write('\n') 424 if extname: 425 self._writeln(f'Extension HDU {idx} ({extname}, {extver}):') 426 else: 427 self._writeln(f'Extension HDU {idx}:') 428 hdu_diff.report(self._fileobj, indent=self._indent + 1) 429 430 431class HDUDiff(_BaseDiff): 432 """ 433 Diff two HDU objects, including their headers and their data (but only if 434 both HDUs contain the same type of data (image, table, or unknown). 435 436 `HDUDiff` objects have the following diff attributes: 437 438 - ``diff_extnames``: If the two HDUs have different EXTNAME values, this 439 contains a 2-tuple of the different extension names. 440 441 - ``diff_extvers``: If the two HDUS have different EXTVER values, this 442 contains a 2-tuple of the different extension versions. 443 444 - ``diff_extlevels``: If the two HDUs have different EXTLEVEL values, this 445 contains a 2-tuple of the different extension levels. 446 447 - ``diff_extension_types``: If the two HDUs have different XTENSION values, 448 this contains a 2-tuple of the different extension types. 449 450 - ``diff_headers``: Contains a `HeaderDiff` object for the headers of the 451 two HDUs. This will always contain an object--it may be determined 452 whether the headers are different through ``diff_headers.identical``. 453 454 - ``diff_data``: Contains either a `ImageDataDiff`, `TableDataDiff`, or 455 `RawDataDiff` as appropriate for the data in the HDUs, and only if the 456 two HDUs have non-empty data of the same type (`RawDataDiff` is used for 457 HDUs containing non-empty data of an indeterminate type). 458 """ 459 460 def __init__(self, a, b, ignore_keywords=[], ignore_comments=[], 461 ignore_fields=[], numdiffs=10, rtol=0.0, atol=0.0, 462 ignore_blanks=True, ignore_blank_cards=True): 463 """ 464 Parameters 465 ---------- 466 a : BaseHDU 467 An HDU object. 468 469 b : BaseHDU 470 An HDU object to compare to the first HDU object. 471 472 ignore_keywords : sequence, optional 473 Header keywords to ignore when comparing two headers; the presence 474 of these keywords and their values are ignored. Wildcard strings 475 may also be included in the list. 476 477 ignore_comments : sequence, optional 478 A list of header keywords whose comments should be ignored in the 479 comparison. May contain wildcard strings as with ignore_keywords. 480 481 ignore_fields : sequence, optional 482 The (case-insensitive) names of any table columns to ignore if any 483 table data is to be compared. 484 485 numdiffs : int, optional 486 The number of pixel/table values to output when reporting HDU data 487 differences. Though the count of differences is the same either 488 way, this allows controlling the number of different values that 489 are kept in memory or output. If a negative value is given, then 490 numdiffs is treated as unlimited (default: 10). 491 492 rtol : float, optional 493 The relative difference to allow when comparing two float values 494 either in header values, image arrays, or table columns 495 (default: 0.0). Values which satisfy the expression 496 497 .. math:: 498 499 \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right| 500 501 are considered to be different. 502 The underlying function used for comparison is `numpy.allclose`. 503 504 .. versionadded:: 2.0 505 506 atol : float, optional 507 The allowed absolute difference. See also ``rtol`` parameter. 508 509 .. versionadded:: 2.0 510 511 ignore_blanks : bool, optional 512 Ignore extra whitespace at the end of string values either in 513 headers or data. Extra leading whitespace is not ignored 514 (default: True). 515 516 ignore_blank_cards : bool, optional 517 Ignore all cards that are blank, i.e. they only contain 518 whitespace (default: True). 519 """ 520 521 self.ignore_keywords = {k.upper() for k in ignore_keywords} 522 self.ignore_comments = {k.upper() for k in ignore_comments} 523 self.ignore_fields = {k.upper() for k in ignore_fields} 524 525 self.rtol = rtol 526 self.atol = atol 527 528 self.numdiffs = numdiffs 529 self.ignore_blanks = ignore_blanks 530 self.ignore_blank_cards = ignore_blank_cards 531 532 self.diff_extnames = () 533 self.diff_extvers = () 534 self.diff_extlevels = () 535 self.diff_extension_types = () 536 self.diff_headers = None 537 self.diff_data = None 538 539 super().__init__(a, b) 540 541 def _diff(self): 542 if self.a.name != self.b.name: 543 self.diff_extnames = (self.a.name, self.b.name) 544 545 if self.a.ver != self.b.ver: 546 self.diff_extvers = (self.a.ver, self.b.ver) 547 548 if self.a.level != self.b.level: 549 self.diff_extlevels = (self.a.level, self.b.level) 550 551 if self.a.header.get('XTENSION') != self.b.header.get('XTENSION'): 552 self.diff_extension_types = (self.a.header.get('XTENSION'), 553 self.b.header.get('XTENSION')) 554 555 self.diff_headers = HeaderDiff.fromdiff(self, self.a.header.copy(), 556 self.b.header.copy()) 557 558 if self.a.data is None or self.b.data is None: 559 # TODO: Perhaps have some means of marking this case 560 pass 561 elif self.a.is_image and self.b.is_image: 562 self.diff_data = ImageDataDiff.fromdiff(self, self.a.data, 563 self.b.data) 564 # Clean up references to (possibly) memmapped arrays so they can 565 # be closed by .close() 566 self.diff_data.a = None 567 self.diff_data.b = None 568 elif (isinstance(self.a, _TableLikeHDU) and 569 isinstance(self.b, _TableLikeHDU)): 570 # TODO: Replace this if/when _BaseHDU grows a .is_table property 571 self.diff_data = TableDataDiff.fromdiff(self, self.a.data, 572 self.b.data) 573 # Clean up references to (possibly) memmapped arrays so they can 574 # be closed by .close() 575 self.diff_data.a = None 576 self.diff_data.b = None 577 elif not self.diff_extension_types: 578 # Don't diff the data for unequal extension types that are not 579 # recognized image or table types 580 self.diff_data = RawDataDiff.fromdiff(self, self.a.data, 581 self.b.data) 582 # Clean up references to (possibly) memmapped arrays so they can 583 # be closed by .close() 584 self.diff_data.a = None 585 self.diff_data.b = None 586 587 def _report(self): 588 if self.identical: 589 self._writeln(" No differences found.") 590 if self.diff_extension_types: 591 self._writeln(" Extension types differ:\n a: {}\n " 592 "b: {}".format(*self.diff_extension_types)) 593 if self.diff_extnames: 594 self._writeln(" Extension names differ:\n a: {}\n " 595 "b: {}".format(*self.diff_extnames)) 596 if self.diff_extvers: 597 self._writeln(" Extension versions differ:\n a: {}\n " 598 "b: {}".format(*self.diff_extvers)) 599 600 if self.diff_extlevels: 601 self._writeln(" Extension levels differ:\n a: {}\n " 602 "b: {}".format(*self.diff_extlevels)) 603 604 if not self.diff_headers.identical: 605 self._fileobj.write('\n') 606 self._writeln(" Headers contain differences:") 607 self.diff_headers.report(self._fileobj, indent=self._indent + 1) 608 609 if self.diff_data is not None and not self.diff_data.identical: 610 self._fileobj.write('\n') 611 self._writeln(" Data contains differences:") 612 self.diff_data.report(self._fileobj, indent=self._indent + 1) 613 614 615class HeaderDiff(_BaseDiff): 616 """ 617 Diff two `Header` objects. 618 619 `HeaderDiff` objects have the following diff attributes: 620 621 - ``diff_keyword_count``: If the two headers contain a different number of 622 keywords, this contains a 2-tuple of the keyword count for each header. 623 624 - ``diff_keywords``: If either header contains one or more keywords that 625 don't appear at all in the other header, this contains a 2-tuple 626 consisting of a list of the keywords only appearing in header a, and a 627 list of the keywords only appearing in header b. 628 629 - ``diff_duplicate_keywords``: If a keyword appears in both headers at 630 least once, but contains a different number of duplicates (for example, a 631 different number of HISTORY cards in each header), an item is added to 632 this dict with the keyword as the key, and a 2-tuple of the different 633 counts of that keyword as the value. For example:: 634 635 {'HISTORY': (20, 19)} 636 637 means that header a contains 20 HISTORY cards, while header b contains 638 only 19 HISTORY cards. 639 640 - ``diff_keyword_values``: If any of the common keyword between the two 641 headers have different values, they appear in this dict. It has a 642 structure similar to ``diff_duplicate_keywords``, with the keyword as the 643 key, and a 2-tuple of the different values as the value. For example:: 644 645 {'NAXIS': (2, 3)} 646 647 means that the NAXIS keyword has a value of 2 in header a, and a value of 648 3 in header b. This excludes any keywords matched by the 649 ``ignore_keywords`` list. 650 651 - ``diff_keyword_comments``: Like ``diff_keyword_values``, but contains 652 differences between keyword comments. 653 654 `HeaderDiff` objects also have a ``common_keywords`` attribute that lists 655 all keywords that appear in both headers. 656 """ 657 658 def __init__(self, a, b, ignore_keywords=[], ignore_comments=[], 659 rtol=0.0, atol=0.0, ignore_blanks=True, ignore_blank_cards=True): 660 """ 661 Parameters 662 ---------- 663 a : `~astropy.io.fits.Header` or string or bytes 664 A header. 665 666 b : `~astropy.io.fits.Header` or string or bytes 667 A header to compare to the first header. 668 669 ignore_keywords : sequence, optional 670 Header keywords to ignore when comparing two headers; the presence 671 of these keywords and their values are ignored. Wildcard strings 672 may also be included in the list. 673 674 ignore_comments : sequence, optional 675 A list of header keywords whose comments should be ignored in the 676 comparison. May contain wildcard strings as with ignore_keywords. 677 678 numdiffs : int, optional 679 The number of pixel/table values to output when reporting HDU data 680 differences. Though the count of differences is the same either 681 way, this allows controlling the number of different values that 682 are kept in memory or output. If a negative value is given, then 683 numdiffs is treated as unlimited (default: 10). 684 685 rtol : float, optional 686 The relative difference to allow when comparing two float values 687 either in header values, image arrays, or table columns 688 (default: 0.0). Values which satisfy the expression 689 690 .. math:: 691 692 \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right| 693 694 are considered to be different. 695 The underlying function used for comparison is `numpy.allclose`. 696 697 .. versionadded:: 2.0 698 699 atol : float, optional 700 The allowed absolute difference. See also ``rtol`` parameter. 701 702 .. versionadded:: 2.0 703 704 ignore_blanks : bool, optional 705 Ignore extra whitespace at the end of string values either in 706 headers or data. Extra leading whitespace is not ignored 707 (default: True). 708 709 ignore_blank_cards : bool, optional 710 Ignore all cards that are blank, i.e. they only contain 711 whitespace (default: True). 712 """ 713 714 self.ignore_keywords = {k.upper() for k in ignore_keywords} 715 self.ignore_comments = {k.upper() for k in ignore_comments} 716 717 self.rtol = rtol 718 self.atol = atol 719 720 self.ignore_blanks = ignore_blanks 721 self.ignore_blank_cards = ignore_blank_cards 722 723 self.ignore_keyword_patterns = set() 724 self.ignore_comment_patterns = set() 725 for keyword in list(self.ignore_keywords): 726 keyword = keyword.upper() 727 if keyword != '*' and glob.has_magic(keyword): 728 self.ignore_keywords.remove(keyword) 729 self.ignore_keyword_patterns.add(keyword) 730 for keyword in list(self.ignore_comments): 731 keyword = keyword.upper() 732 if keyword != '*' and glob.has_magic(keyword): 733 self.ignore_comments.remove(keyword) 734 self.ignore_comment_patterns.add(keyword) 735 736 # Keywords appearing in each header 737 self.common_keywords = [] 738 739 # Set to the number of keywords in each header if the counts differ 740 self.diff_keyword_count = () 741 742 # Set if the keywords common to each header (excluding ignore_keywords) 743 # appear in different positions within the header 744 # TODO: Implement this 745 self.diff_keyword_positions = () 746 747 # Keywords unique to each header (excluding keywords in 748 # ignore_keywords) 749 self.diff_keywords = () 750 751 # Keywords that have different numbers of duplicates in each header 752 # (excluding keywords in ignore_keywords) 753 self.diff_duplicate_keywords = {} 754 755 # Keywords common to each header but having different values (excluding 756 # keywords in ignore_keywords) 757 self.diff_keyword_values = defaultdict(list) 758 759 # Keywords common to each header but having different comments 760 # (excluding keywords in ignore_keywords or in ignore_comments) 761 self.diff_keyword_comments = defaultdict(list) 762 763 if isinstance(a, str): 764 a = Header.fromstring(a) 765 if isinstance(b, str): 766 b = Header.fromstring(b) 767 768 if not (isinstance(a, Header) and isinstance(b, Header)): 769 raise TypeError('HeaderDiff can only diff astropy.io.fits.Header ' 770 'objects or strings containing FITS headers.') 771 772 super().__init__(a, b) 773 774 # TODO: This doesn't pay much attention to the *order* of the keywords, 775 # except in the case of duplicate keywords. The order should be checked 776 # too, or at least it should be an option. 777 def _diff(self): 778 if self.ignore_blank_cards: 779 cardsa = [c for c in self.a.cards if str(c) != BLANK_CARD] 780 cardsb = [c for c in self.b.cards if str(c) != BLANK_CARD] 781 else: 782 cardsa = list(self.a.cards) 783 cardsb = list(self.b.cards) 784 785 # build dictionaries of keyword values and comments 786 def get_header_values_comments(cards): 787 values = {} 788 comments = {} 789 for card in cards: 790 value = card.value 791 if self.ignore_blanks and isinstance(value, str): 792 value = value.rstrip() 793 values.setdefault(card.keyword, []).append(value) 794 comments.setdefault(card.keyword, []).append(card.comment) 795 return values, comments 796 797 valuesa, commentsa = get_header_values_comments(cardsa) 798 valuesb, commentsb = get_header_values_comments(cardsb) 799 800 # Normalize all keyword to upper-case for comparison's sake; 801 # TODO: HIERARCH keywords should be handled case-sensitively I think 802 keywordsa = {k.upper() for k in valuesa} 803 keywordsb = {k.upper() for k in valuesb} 804 805 self.common_keywords = sorted(keywordsa.intersection(keywordsb)) 806 if len(cardsa) != len(cardsb): 807 self.diff_keyword_count = (len(cardsa), len(cardsb)) 808 809 # Any other diff attributes should exclude ignored keywords 810 keywordsa = keywordsa.difference(self.ignore_keywords) 811 keywordsb = keywordsb.difference(self.ignore_keywords) 812 if self.ignore_keyword_patterns: 813 for pattern in self.ignore_keyword_patterns: 814 keywordsa = keywordsa.difference(fnmatch.filter(keywordsa, 815 pattern)) 816 keywordsb = keywordsb.difference(fnmatch.filter(keywordsb, 817 pattern)) 818 819 if '*' in self.ignore_keywords: 820 # Any other differences between keywords are to be ignored 821 return 822 823 left_only_keywords = sorted(keywordsa.difference(keywordsb)) 824 right_only_keywords = sorted(keywordsb.difference(keywordsa)) 825 826 if left_only_keywords or right_only_keywords: 827 self.diff_keywords = (left_only_keywords, right_only_keywords) 828 829 # Compare count of each common keyword 830 for keyword in self.common_keywords: 831 if keyword in self.ignore_keywords: 832 continue 833 if self.ignore_keyword_patterns: 834 skip = False 835 for pattern in self.ignore_keyword_patterns: 836 if fnmatch.fnmatch(keyword, pattern): 837 skip = True 838 break 839 if skip: 840 continue 841 842 counta = len(valuesa[keyword]) 843 countb = len(valuesb[keyword]) 844 if counta != countb: 845 self.diff_duplicate_keywords[keyword] = (counta, countb) 846 847 # Compare keywords' values and comments 848 for a, b in zip(valuesa[keyword], valuesb[keyword]): 849 if diff_values(a, b, rtol=self.rtol, atol=self.atol): 850 self.diff_keyword_values[keyword].append((a, b)) 851 else: 852 # If there are duplicate keywords we need to be able to 853 # index each duplicate; if the values of a duplicate 854 # are identical use None here 855 self.diff_keyword_values[keyword].append(None) 856 857 if not any(self.diff_keyword_values[keyword]): 858 # No differences found; delete the array of Nones 859 del self.diff_keyword_values[keyword] 860 861 if '*' in self.ignore_comments or keyword in self.ignore_comments: 862 continue 863 if self.ignore_comment_patterns: 864 skip = False 865 for pattern in self.ignore_comment_patterns: 866 if fnmatch.fnmatch(keyword, pattern): 867 skip = True 868 break 869 if skip: 870 continue 871 872 for a, b in zip(commentsa[keyword], commentsb[keyword]): 873 if diff_values(a, b): 874 self.diff_keyword_comments[keyword].append((a, b)) 875 else: 876 self.diff_keyword_comments[keyword].append(None) 877 878 if not any(self.diff_keyword_comments[keyword]): 879 del self.diff_keyword_comments[keyword] 880 881 def _report(self): 882 if self.diff_keyword_count: 883 self._writeln(' Headers have different number of cards:') 884 self._writeln(f' a: {self.diff_keyword_count[0]}') 885 self._writeln(f' b: {self.diff_keyword_count[1]}') 886 if self.diff_keywords: 887 for keyword in self.diff_keywords[0]: 888 if keyword in Card._commentary_keywords: 889 val = self.a[keyword][0] 890 else: 891 val = self.a[keyword] 892 self._writeln(f' Extra keyword {keyword!r:8} in a: {val!r}') 893 for keyword in self.diff_keywords[1]: 894 if keyword in Card._commentary_keywords: 895 val = self.b[keyword][0] 896 else: 897 val = self.b[keyword] 898 self._writeln(f' Extra keyword {keyword!r:8} in b: {val!r}') 899 900 if self.diff_duplicate_keywords: 901 for keyword, count in sorted(self.diff_duplicate_keywords.items()): 902 self._writeln(f' Inconsistent duplicates of keyword {keyword!r:8}:') 903 self._writeln(' Occurs {} time(s) in a, {} times in (b)' 904 .format(*count)) 905 906 if self.diff_keyword_values or self.diff_keyword_comments: 907 for keyword in self.common_keywords: 908 report_diff_keyword_attr(self._fileobj, 'values', 909 self.diff_keyword_values, keyword, 910 ind=self._indent) 911 report_diff_keyword_attr(self._fileobj, 'comments', 912 self.diff_keyword_comments, keyword, 913 ind=self._indent) 914 915# TODO: It might be good if there was also a threshold option for percentage of 916# different pixels: For example ignore if only 1% of the pixels are different 917# within some threshold. There are lots of possibilities here, but hold off 918# for now until specific cases come up. 919 920 921class ImageDataDiff(_BaseDiff): 922 """ 923 Diff two image data arrays (really any array from a PRIMARY HDU or an IMAGE 924 extension HDU, though the data unit is assumed to be "pixels"). 925 926 `ImageDataDiff` objects have the following diff attributes: 927 928 - ``diff_dimensions``: If the two arrays contain either a different number 929 of dimensions or different sizes in any dimension, this contains a 930 2-tuple of the shapes of each array. Currently no further comparison is 931 performed on images that don't have the exact same dimensions. 932 933 - ``diff_pixels``: If the two images contain any different pixels, this 934 contains a list of 2-tuples of the array index where the difference was 935 found, and another 2-tuple containing the different values. For example, 936 if the pixel at (0, 0) contains different values this would look like:: 937 938 [(0, 0), (1.1, 2.2)] 939 940 where 1.1 and 2.2 are the values of that pixel in each array. This 941 array only contains up to ``self.numdiffs`` differences, for storage 942 efficiency. 943 944 - ``diff_total``: The total number of different pixels found between the 945 arrays. Although ``diff_pixels`` does not necessarily contain all the 946 different pixel values, this can be used to get a count of the total 947 number of differences found. 948 949 - ``diff_ratio``: Contains the ratio of ``diff_total`` to the total number 950 of pixels in the arrays. 951 """ 952 953 def __init__(self, a, b, numdiffs=10, rtol=0.0, atol=0.0): 954 """ 955 Parameters 956 ---------- 957 a : BaseHDU 958 An HDU object. 959 960 b : BaseHDU 961 An HDU object to compare to the first HDU object. 962 963 numdiffs : int, optional 964 The number of pixel/table values to output when reporting HDU data 965 differences. Though the count of differences is the same either 966 way, this allows controlling the number of different values that 967 are kept in memory or output. If a negative value is given, then 968 numdiffs is treated as unlimited (default: 10). 969 970 rtol : float, optional 971 The relative difference to allow when comparing two float values 972 either in header values, image arrays, or table columns 973 (default: 0.0). Values which satisfy the expression 974 975 .. math:: 976 977 \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right| 978 979 are considered to be different. 980 The underlying function used for comparison is `numpy.allclose`. 981 982 .. versionadded:: 2.0 983 984 atol : float, optional 985 The allowed absolute difference. See also ``rtol`` parameter. 986 987 .. versionadded:: 2.0 988 """ 989 990 self.numdiffs = numdiffs 991 self.rtol = rtol 992 self.atol = atol 993 994 self.diff_dimensions = () 995 self.diff_pixels = [] 996 self.diff_ratio = 0 997 998 # self.diff_pixels only holds up to numdiffs differing pixels, but this 999 # self.diff_total stores the total count of differences between 1000 # the images, but not the different values 1001 self.diff_total = 0 1002 1003 super().__init__(a, b) 1004 1005 def _diff(self): 1006 if self.a.shape != self.b.shape: 1007 self.diff_dimensions = (self.a.shape, self.b.shape) 1008 # Don't do any further comparison if the dimensions differ 1009 # TODO: Perhaps we could, however, diff just the intersection 1010 # between the two images 1011 return 1012 1013 # Find the indices where the values are not equal 1014 # If neither a nor b are floating point (or complex), ignore rtol and 1015 # atol 1016 if not (np.issubdtype(self.a.dtype, np.inexact) or 1017 np.issubdtype(self.b.dtype, np.inexact)): 1018 rtol = 0 1019 atol = 0 1020 else: 1021 rtol = self.rtol 1022 atol = self.atol 1023 1024 diffs = where_not_allclose(self.a, self.b, atol=atol, rtol=rtol) 1025 1026 self.diff_total = len(diffs[0]) 1027 1028 if self.diff_total == 0: 1029 # Then we're done 1030 return 1031 1032 if self.numdiffs < 0: 1033 numdiffs = self.diff_total 1034 else: 1035 numdiffs = self.numdiffs 1036 1037 self.diff_pixels = [(idx, (self.a[idx], self.b[idx])) 1038 for idx in islice(zip(*diffs), 0, numdiffs)] 1039 self.diff_ratio = float(self.diff_total) / float(len(self.a.flat)) 1040 1041 def _report(self): 1042 if self.diff_dimensions: 1043 dimsa = ' x '.join(str(d) for d in 1044 reversed(self.diff_dimensions[0])) 1045 dimsb = ' x '.join(str(d) for d in 1046 reversed(self.diff_dimensions[1])) 1047 self._writeln(' Data dimensions differ:') 1048 self._writeln(f' a: {dimsa}') 1049 self._writeln(f' b: {dimsb}') 1050 # For now we don't do any further comparison if the dimensions 1051 # differ; though in the future it might be nice to be able to 1052 # compare at least where the images intersect 1053 self._writeln(' No further data comparison performed.') 1054 return 1055 1056 if not self.diff_pixels: 1057 return 1058 1059 for index, values in self.diff_pixels: 1060 index = [x + 1 for x in reversed(index)] 1061 self._writeln(f' Data differs at {index}:') 1062 report_diff_values(values[0], values[1], fileobj=self._fileobj, 1063 indent_width=self._indent + 1) 1064 1065 if self.diff_total > self.numdiffs: 1066 self._writeln(' ...') 1067 self._writeln(' {} different pixels found ({:.2%} different).' 1068 .format(self.diff_total, self.diff_ratio)) 1069 1070 1071class RawDataDiff(ImageDataDiff): 1072 """ 1073 `RawDataDiff` is just a special case of `ImageDataDiff` where the images 1074 are one-dimensional, and the data is treated as a 1-dimensional array of 1075 bytes instead of pixel values. This is used to compare the data of two 1076 non-standard extension HDUs that were not recognized as containing image or 1077 table data. 1078 1079 `ImageDataDiff` objects have the following diff attributes: 1080 1081 - ``diff_dimensions``: Same as the ``diff_dimensions`` attribute of 1082 `ImageDataDiff` objects. Though the "dimension" of each array is just an 1083 integer representing the number of bytes in the data. 1084 1085 - ``diff_bytes``: Like the ``diff_pixels`` attribute of `ImageDataDiff` 1086 objects, but renamed to reflect the minor semantic difference that these 1087 are raw bytes and not pixel values. Also the indices are integers 1088 instead of tuples. 1089 1090 - ``diff_total`` and ``diff_ratio``: Same as `ImageDataDiff`. 1091 """ 1092 1093 def __init__(self, a, b, numdiffs=10): 1094 """ 1095 Parameters 1096 ---------- 1097 a : BaseHDU 1098 An HDU object. 1099 1100 b : BaseHDU 1101 An HDU object to compare to the first HDU object. 1102 1103 numdiffs : int, optional 1104 The number of pixel/table values to output when reporting HDU data 1105 differences. Though the count of differences is the same either 1106 way, this allows controlling the number of different values that 1107 are kept in memory or output. If a negative value is given, then 1108 numdiffs is treated as unlimited (default: 10). 1109 """ 1110 1111 self.diff_dimensions = () 1112 self.diff_bytes = [] 1113 1114 super().__init__(a, b, numdiffs=numdiffs) 1115 1116 def _diff(self): 1117 super()._diff() 1118 if self.diff_dimensions: 1119 self.diff_dimensions = (self.diff_dimensions[0][0], 1120 self.diff_dimensions[1][0]) 1121 1122 self.diff_bytes = [(x[0], y) for x, y in self.diff_pixels] 1123 del self.diff_pixels 1124 1125 def _report(self): 1126 if self.diff_dimensions: 1127 self._writeln(' Data sizes differ:') 1128 self._writeln(f' a: {self.diff_dimensions[0]} bytes') 1129 self._writeln(f' b: {self.diff_dimensions[1]} bytes') 1130 # For now we don't do any further comparison if the dimensions 1131 # differ; though in the future it might be nice to be able to 1132 # compare at least where the images intersect 1133 self._writeln(' No further data comparison performed.') 1134 return 1135 1136 if not self.diff_bytes: 1137 return 1138 1139 for index, values in self.diff_bytes: 1140 self._writeln(f' Data differs at byte {index}:') 1141 report_diff_values(values[0], values[1], fileobj=self._fileobj, 1142 indent_width=self._indent + 1) 1143 1144 self._writeln(' ...') 1145 self._writeln(' {} different bytes found ({:.2%} different).' 1146 .format(self.diff_total, self.diff_ratio)) 1147 1148 1149class TableDataDiff(_BaseDiff): 1150 """ 1151 Diff two table data arrays. It doesn't matter whether the data originally 1152 came from a binary or ASCII table--the data should be passed in as a 1153 recarray. 1154 1155 `TableDataDiff` objects have the following diff attributes: 1156 1157 - ``diff_column_count``: If the tables being compared have different 1158 numbers of columns, this contains a 2-tuple of the column count in each 1159 table. Even if the tables have different column counts, an attempt is 1160 still made to compare any columns they have in common. 1161 1162 - ``diff_columns``: If either table contains columns unique to that table, 1163 either in name or format, this contains a 2-tuple of lists. The first 1164 element is a list of columns (these are full `Column` objects) that 1165 appear only in table a. The second element is a list of tables that 1166 appear only in table b. This only lists columns with different column 1167 definitions, and has nothing to do with the data in those columns. 1168 1169 - ``diff_column_names``: This is like ``diff_columns``, but lists only the 1170 names of columns unique to either table, rather than the full `Column` 1171 objects. 1172 1173 - ``diff_column_attributes``: Lists columns that are in both tables but 1174 have different secondary attributes, such as TUNIT or TDISP. The format 1175 is a list of 2-tuples: The first a tuple of the column name and the 1176 attribute, the second a tuple of the different values. 1177 1178 - ``diff_values``: `TableDataDiff` compares the data in each table on a 1179 column-by-column basis. If any different data is found, it is added to 1180 this list. The format of this list is similar to the ``diff_pixels`` 1181 attribute on `ImageDataDiff` objects, though the "index" consists of a 1182 (column_name, row) tuple. For example:: 1183 1184 [('TARGET', 0), ('NGC1001', 'NGC1002')] 1185 1186 shows that the tables contain different values in the 0-th row of the 1187 'TARGET' column. 1188 1189 - ``diff_total`` and ``diff_ratio``: Same as `ImageDataDiff`. 1190 1191 `TableDataDiff` objects also have a ``common_columns`` attribute that lists 1192 the `Column` objects for columns that are identical in both tables, and a 1193 ``common_column_names`` attribute which contains a set of the names of 1194 those columns. 1195 """ 1196 1197 def __init__(self, a, b, ignore_fields=[], numdiffs=10, rtol=0.0, atol=0.0): 1198 """ 1199 Parameters 1200 ---------- 1201 a : BaseHDU 1202 An HDU object. 1203 1204 b : BaseHDU 1205 An HDU object to compare to the first HDU object. 1206 1207 ignore_fields : sequence, optional 1208 The (case-insensitive) names of any table columns to ignore if any 1209 table data is to be compared. 1210 1211 numdiffs : int, optional 1212 The number of pixel/table values to output when reporting HDU data 1213 differences. Though the count of differences is the same either 1214 way, this allows controlling the number of different values that 1215 are kept in memory or output. If a negative value is given, then 1216 numdiffs is treated as unlimited (default: 10). 1217 1218 rtol : float, optional 1219 The relative difference to allow when comparing two float values 1220 either in header values, image arrays, or table columns 1221 (default: 0.0). Values which satisfy the expression 1222 1223 .. math:: 1224 1225 \\left| a - b \\right| > \\text{atol} + \\text{rtol} \\cdot \\left| b \\right| 1226 1227 are considered to be different. 1228 The underlying function used for comparison is `numpy.allclose`. 1229 1230 .. versionadded:: 2.0 1231 1232 atol : float, optional 1233 The allowed absolute difference. See also ``rtol`` parameter. 1234 1235 .. versionadded:: 2.0 1236 """ 1237 1238 self.ignore_fields = set(ignore_fields) 1239 self.numdiffs = numdiffs 1240 self.rtol = rtol 1241 self.atol = atol 1242 1243 self.common_columns = [] 1244 self.common_column_names = set() 1245 1246 # self.diff_columns contains columns with different column definitions, 1247 # but not different column data. Column data is only compared in 1248 # columns that have the same definitions 1249 self.diff_rows = () 1250 self.diff_column_count = () 1251 self.diff_columns = () 1252 1253 # If two columns have the same name+format, but other attributes are 1254 # different (such as TUNIT or such) they are listed here 1255 self.diff_column_attributes = [] 1256 1257 # Like self.diff_columns, but just contains a list of the column names 1258 # unique to each table, and in the order they appear in the tables 1259 self.diff_column_names = () 1260 self.diff_values = [] 1261 1262 self.diff_ratio = 0 1263 self.diff_total = 0 1264 1265 super().__init__(a, b) 1266 1267 def _diff(self): 1268 # Much of the code for comparing columns is similar to the code for 1269 # comparing headers--consider refactoring 1270 colsa = self.a.columns 1271 colsb = self.b.columns 1272 1273 if len(colsa) != len(colsb): 1274 self.diff_column_count = (len(colsa), len(colsb)) 1275 1276 # Even if the number of columns are unequal, we still do comparison of 1277 # any common columns 1278 colsa = {c.name.lower(): c for c in colsa} 1279 colsb = {c.name.lower(): c for c in colsb} 1280 1281 if '*' in self.ignore_fields: 1282 # If all columns are to be ignored, ignore any further differences 1283 # between the columns 1284 return 1285 1286 # Keep the user's original ignore_fields list for reporting purposes, 1287 # but internally use a case-insensitive version 1288 ignore_fields = {f.lower() for f in self.ignore_fields} 1289 1290 # It might be nice if there were a cleaner way to do this, but for now 1291 # it'll do 1292 for fieldname in ignore_fields: 1293 fieldname = fieldname.lower() 1294 if fieldname in colsa: 1295 del colsa[fieldname] 1296 if fieldname in colsb: 1297 del colsb[fieldname] 1298 1299 colsa_set = set(colsa.values()) 1300 colsb_set = set(colsb.values()) 1301 self.common_columns = sorted(colsa_set.intersection(colsb_set), 1302 key=operator.attrgetter('name')) 1303 1304 self.common_column_names = {col.name.lower() 1305 for col in self.common_columns} 1306 1307 left_only_columns = {col.name.lower(): col 1308 for col in colsa_set.difference(colsb_set)} 1309 right_only_columns = {col.name.lower(): col 1310 for col in colsb_set.difference(colsa_set)} 1311 1312 if left_only_columns or right_only_columns: 1313 self.diff_columns = (left_only_columns, right_only_columns) 1314 self.diff_column_names = ([], []) 1315 1316 if left_only_columns: 1317 for col in self.a.columns: 1318 if col.name.lower() in left_only_columns: 1319 self.diff_column_names[0].append(col.name) 1320 1321 if right_only_columns: 1322 for col in self.b.columns: 1323 if col.name.lower() in right_only_columns: 1324 self.diff_column_names[1].append(col.name) 1325 1326 # If the tables have a different number of rows, we don't compare the 1327 # columns right now. 1328 # TODO: It might be nice to optionally compare the first n rows where n 1329 # is the minimum of the row counts between the two tables. 1330 if len(self.a) != len(self.b): 1331 self.diff_rows = (len(self.a), len(self.b)) 1332 return 1333 1334 # If the tables contain no rows there's no data to compare, so we're 1335 # done at this point. (See ticket #178) 1336 if len(self.a) == len(self.b) == 0: 1337 return 1338 1339 # Like in the old fitsdiff, compare tables on a column by column basis 1340 # The difficulty here is that, while FITS column names are meant to be 1341 # case-insensitive, Astropy still allows, for the sake of flexibility, 1342 # two columns with the same name but different case. When columns are 1343 # accessed in FITS tables, a case-sensitive is tried first, and failing 1344 # that a case-insensitive match is made. 1345 # It's conceivable that the same column could appear in both tables 1346 # being compared, but with different case. 1347 # Though it *may* lead to inconsistencies in these rare cases, this 1348 # just assumes that there are no duplicated column names in either 1349 # table, and that the column names can be treated case-insensitively. 1350 for col in self.common_columns: 1351 name_lower = col.name.lower() 1352 if name_lower in ignore_fields: 1353 continue 1354 1355 cola = colsa[name_lower] 1356 colb = colsb[name_lower] 1357 1358 for attr, _ in _COL_ATTRS: 1359 vala = getattr(cola, attr, None) 1360 valb = getattr(colb, attr, None) 1361 if diff_values(vala, valb): 1362 self.diff_column_attributes.append( 1363 ((col.name.upper(), attr), (vala, valb))) 1364 1365 arra = self.a[col.name] 1366 arrb = self.b[col.name] 1367 1368 if (np.issubdtype(arra.dtype, np.floating) and 1369 np.issubdtype(arrb.dtype, np.floating)): 1370 diffs = where_not_allclose(arra, arrb, 1371 rtol=self.rtol, 1372 atol=self.atol) 1373 elif 'P' in col.format: 1374 diffs = ([idx for idx in range(len(arra)) 1375 if not np.allclose(arra[idx], arrb[idx], 1376 rtol=self.rtol, 1377 atol=self.atol)],) 1378 else: 1379 diffs = np.where(arra != arrb) 1380 1381 self.diff_total += len(set(diffs[0])) 1382 1383 if self.numdiffs >= 0: 1384 if len(self.diff_values) >= self.numdiffs: 1385 # Don't save any more diff values 1386 continue 1387 1388 # Add no more diff'd values than this 1389 max_diffs = self.numdiffs - len(self.diff_values) 1390 else: 1391 max_diffs = len(diffs[0]) 1392 1393 last_seen_idx = None 1394 for idx in islice(diffs[0], 0, max_diffs): 1395 if idx == last_seen_idx: 1396 # Skip duplicate indices, which my occur when the column 1397 # data contains multi-dimensional values; we're only 1398 # interested in storing row-by-row differences 1399 continue 1400 last_seen_idx = idx 1401 self.diff_values.append(((col.name, idx), 1402 (arra[idx], arrb[idx]))) 1403 1404 total_values = len(self.a) * len(self.a.dtype.fields) 1405 self.diff_ratio = float(self.diff_total) / float(total_values) 1406 1407 def _report(self): 1408 if self.diff_column_count: 1409 self._writeln(' Tables have different number of columns:') 1410 self._writeln(f' a: {self.diff_column_count[0]}') 1411 self._writeln(f' b: {self.diff_column_count[1]}') 1412 1413 if self.diff_column_names: 1414 # Show columns with names unique to either table 1415 for name in self.diff_column_names[0]: 1416 format = self.diff_columns[0][name.lower()].format 1417 self._writeln(f' Extra column {name} of format {format} in a') 1418 for name in self.diff_column_names[1]: 1419 format = self.diff_columns[1][name.lower()].format 1420 self._writeln(f' Extra column {name} of format {format} in b') 1421 1422 col_attrs = dict(_COL_ATTRS) 1423 # Now go through each table again and show columns with common 1424 # names but other property differences... 1425 for col_attr, vals in self.diff_column_attributes: 1426 name, attr = col_attr 1427 self._writeln(f' Column {name} has different {col_attrs[attr]}:') 1428 report_diff_values(vals[0], vals[1], fileobj=self._fileobj, 1429 indent_width=self._indent + 1) 1430 1431 if self.diff_rows: 1432 self._writeln(' Table rows differ:') 1433 self._writeln(f' a: {self.diff_rows[0]}') 1434 self._writeln(f' b: {self.diff_rows[1]}') 1435 self._writeln(' No further data comparison performed.') 1436 return 1437 1438 if not self.diff_values: 1439 return 1440 1441 # Finally, let's go through and report column data differences: 1442 for indx, values in self.diff_values: 1443 self._writeln(' Column {} data differs in row {}:'.format(*indx)) 1444 report_diff_values(values[0], values[1], fileobj=self._fileobj, 1445 indent_width=self._indent + 1) 1446 1447 if self.diff_values and self.numdiffs < self.diff_total: 1448 self._writeln(' ...{} additional difference(s) found.'.format( 1449 self.diff_total - self.numdiffs)) 1450 1451 if self.diff_total > self.numdiffs: 1452 self._writeln(' ...') 1453 1454 self._writeln(' {} different table data element(s) found ' 1455 '({:.2%} different).' 1456 .format(self.diff_total, self.diff_ratio)) 1457 1458 1459def report_diff_keyword_attr(fileobj, attr, diffs, keyword, ind=0): 1460 """ 1461 Write a diff between two header keyword values or comments to the specified 1462 file-like object. 1463 """ 1464 1465 if keyword in diffs: 1466 vals = diffs[keyword] 1467 for idx, val in enumerate(vals): 1468 if val is None: 1469 continue 1470 if idx == 0: 1471 dup = '' 1472 else: 1473 dup = f'[{idx + 1}]' 1474 fileobj.write( 1475 fixed_width_indent(' Keyword {:8}{} has different {}:\n' 1476 .format(keyword, dup, attr), ind)) 1477 report_diff_values(val[0], val[1], fileobj=fileobj, 1478 indent_width=ind + 1) 1479