base.py - OpenGrok cross reference for /dports/astro/py-astropy/astropy-5.0/astropy/io/fits/hdu/base.py

# Licensed under a 3-clause BSD style license - see PYFITS.rst


import datetime
import os
import sys
import warnings
from contextlib import suppress
from inspect import signature, Parameter

import numpy as np

from astropy.io.fits import conf
from astropy.io.fits.file import _File
from astropy.io.fits.header import (Header, _BasicHeader, _pad_length,
                                    _DelayedHeader)
from astropy.io.fits.util import (_is_int, _is_pseudo_integer, _pseudo_zero,
                    itersubclasses, decode_ascii, _get_array_mmap, first,
                    _free_space_check, _extract_number)
from astropy.io.fits.verify import _Verify, _ErrList

from astropy.utils import lazyproperty
from astropy.utils.exceptions import AstropyUserWarning
from astropy.utils.decorators import deprecated_renamed_argument


__all__ = [
    "DELAYED",
    # classes
    "InvalidHDUException",
    "ExtensionHDU",
    "NonstandardExtHDU",
]


class _Delayed:
    pass


DELAYED = _Delayed()


BITPIX2DTYPE = {8: 'uint8', 16: 'int16', 32: 'int32', 64: 'int64',
                -32: 'float32', -64: 'float64'}
"""Maps FITS BITPIX values to Numpy dtype names."""

DTYPE2BITPIX = {'int8': 8, 'uint8': 8, 'int16': 16, 'uint16': 16,
                'int32': 32, 'uint32': 32, 'int64': 64, 'uint64': 64,
                'float32': -32, 'float64': -64}
"""
Maps Numpy dtype names to FITS BITPIX values (this includes unsigned
integers, with the assumption that the pseudo-unsigned integer convention
will be used in this case.
"""


class InvalidHDUException(Exception):
    """
    A custom exception class used mainly to signal to _BaseHDU.__new__ that
    an HDU cannot possibly be considered valid, and must be assumed to be
    corrupted.
    """


def _hdu_class_from_header(cls, header):
    """
    Iterates through the subclasses of _BaseHDU and uses that class's
    match_header() method to determine which subclass to instantiate.

    It's important to be aware that the class hierarchy is traversed in a
    depth-last order.  Each match_header() should identify an HDU type as
    uniquely as possible.  Abstract types may choose to simply return False
    or raise NotImplementedError to be skipped.

    If any unexpected exceptions are raised while evaluating
    match_header(), the type is taken to be _CorruptedHDU.

    Used primarily by _BaseHDU._readfrom_internal and _BaseHDU._from_data to
    find an appropriate HDU class to use based on values in the header.
    """

    klass = cls  # By default, if no subclasses are defined
    if header:
        for c in reversed(list(itersubclasses(cls))):
            try:
                # HDU classes built into astropy.io.fits are always considered,
                # but extension HDUs must be explicitly registered
                if not (c.__module__.startswith('astropy.io.fits.') or
                        c in cls._hdu_registry):
                    continue
                if c.match_header(header):
                    klass = c
                    break
            except NotImplementedError:
                continue
            except Exception as exc:
                warnings.warn(
                    'An exception occurred matching an HDU header to the '
                    'appropriate HDU type: {}'.format(exc),
                    AstropyUserWarning)
                warnings.warn('The HDU will be treated as corrupted.',
                              AstropyUserWarning)
                klass = _CorruptedHDU
                del exc
                break

    return klass


# TODO: Come up with a better __repr__ for HDUs (and for HDULists, for that
# matter)
class _BaseHDU:
    """Base class for all HDU (header data unit) classes."""

    _hdu_registry = set()

    # This HDU type is part of the FITS standard
    _standard = True

    # Byte to use for padding out blocks
    _padding_byte = '\x00'

    _default_name = ''

    # _header uses a descriptor to delay the loading of the fits.Header object
    # until it is necessary.
    _header = _DelayedHeader()

    def __init__(self, data=None, header=None, *args, **kwargs):
        if header is None:
            header = Header()
        self._header = header
        self._header_str = None
        self._file = None
        self._buffer = None
        self._header_offset = None
        self._data_offset = None
        self._data_size = None

        # This internal variable is used to track whether the data attribute
        # still points to the same data array as when the HDU was originally
        # created (this does not track whether the data is actually the same
        # content-wise)
        self._data_replaced = False
        self._data_needs_rescale = False
        self._new = True
        self._output_checksum = False

        if 'DATASUM' in self._header and 'CHECKSUM' not in self._header:
            self._output_checksum = 'datasum'
        elif 'CHECKSUM' in self._header:
            self._output_checksum = True

    def __init_subclass__(cls, **kwargs):
        # Add the same data.deleter to all HDUs with a data property.
        # It's unfortunate, but there's otherwise no straightforward way
        # that a property can inherit setters/deleters of the property of the
        # same name on base classes.
        data_prop = cls.__dict__.get('data', None)
        if (isinstance(data_prop, (lazyproperty, property))
                and data_prop.fdel is None):
            # Don't do anything if the class has already explicitly
            # set the deleter for its data property
            def data(self):
                # The deleter
                if self._file is not None and self._data_loaded:
                    data_refcount = sys.getrefcount(self.data)
                    # Manually delete *now* so that FITS_rec.__del__
                    # cleanup can happen if applicable
                    del self.__dict__['data']
                    # Don't even do this unless the *only* reference to the
                    # .data array was the one we're deleting by deleting
                    # this attribute; if any other references to the array
                    # are hanging around (perhaps the user ran ``data =
                    # hdu.data``) don't even consider this:
                    if data_refcount == 2:
                        self._file._maybe_close_mmap()

            setattr(cls, 'data', data_prop.deleter(data))

        return super().__init_subclass__(**kwargs)

    @property
    def header(self):
        return self._header

    @header.setter
    def header(self, value):
        self._header = value

    @property
    def name(self):
        # Convert the value to a string to be flexible in some pathological
        # cases (see ticket #96)
        return str(self._header.get('EXTNAME', self._default_name))

    @name.setter
    def name(self, value):
        if not isinstance(value, str):
            raise TypeError("'name' attribute must be a string")
        if not conf.extension_name_case_sensitive:
            value = value.upper()
        if 'EXTNAME' in self._header:
            self._header['EXTNAME'] = value
        else:
            self._header['EXTNAME'] = (value, 'extension name')

    @property
    def ver(self):
        return self._header.get('EXTVER', 1)

    @ver.setter
    def ver(self, value):
        if not _is_int(value):
            raise TypeError("'ver' attribute must be an integer")
        if 'EXTVER' in self._header:
            self._header['EXTVER'] = value
        else:
            self._header['EXTVER'] = (value, 'extension value')

    @property
    def level(self):
        return self._header.get('EXTLEVEL', 1)

    @level.setter
    def level(self, value):
        if not _is_int(value):
            raise TypeError("'level' attribute must be an integer")
        if 'EXTLEVEL' in self._header:
            self._header['EXTLEVEL'] = value
        else:
            self._header['EXTLEVEL'] = (value, 'extension level')

    @property
    def is_image(self):
        return (
            self.name == 'PRIMARY' or
            ('XTENSION' in self._header and
             (self._header['XTENSION'] == 'IMAGE' or
              (self._header['XTENSION'] == 'BINTABLE' and
               'ZIMAGE' in self._header and self._header['ZIMAGE'] is True))))

    @property
    def _data_loaded(self):
        return ('data' in self.__dict__ and self.data is not DELAYED)

    @property
    def _has_data(self):
        return self._data_loaded and self.data is not None

    @classmethod
    def register_hdu(cls, hducls):
        cls._hdu_registry.add(hducls)

    @classmethod
    def unregister_hdu(cls, hducls):
        if hducls in cls._hdu_registry:
            cls._hdu_registry.remove(hducls)

    @classmethod
    def match_header(cls, header):
        raise NotImplementedError

    @classmethod
    def fromstring(cls, data, checksum=False, ignore_missing_end=False,
                   **kwargs):
        """
        Creates a new HDU object of the appropriate type from a string
        containing the HDU's entire header and, optionally, its data.

        Note: When creating a new HDU from a string without a backing file
        object, the data of that HDU may be read-only.  It depends on whether
        the underlying string was an immutable Python str/bytes object, or some
        kind of read-write memory buffer such as a `memoryview`.

        Parameters
        ----------
        data : str, bytearray, memoryview, ndarray
           A byte string containing the HDU's header and data.

        checksum : bool, optional
           Check the HDU's checksum and/or datasum.

        ignore_missing_end : bool, optional
           Ignore a missing end card in the header data.  Note that without the
           end card the end of the header may be ambiguous and resulted in a
           corrupt HDU.  In this case the assumption is that the first 2880
           block that does not begin with valid FITS header data is the
           beginning of the data.

        kwargs : optional
           May consist of additional keyword arguments specific to an HDU
           type--these correspond to keywords recognized by the constructors of
           different HDU classes such as `PrimaryHDU`, `ImageHDU`, or
           `BinTableHDU`.  Any unrecognized keyword arguments are simply
           ignored.
        """

        return cls._readfrom_internal(data, checksum=checksum,
                                      ignore_missing_end=ignore_missing_end,
                                      **kwargs)

    @classmethod
    def readfrom(cls, fileobj, checksum=False, ignore_missing_end=False,
                 **kwargs):
        """
        Read the HDU from a file.  Normally an HDU should be opened with
        :func:`open` which reads the entire HDU list in a FITS file.  But this
        method is still provided for symmetry with :func:`writeto`.

        Parameters
        ----------
        fileobj : file-like
            Input FITS file.  The file's seek pointer is assumed to be at the
            beginning of the HDU.

        checksum : bool
            If `True`, verifies that both ``DATASUM`` and ``CHECKSUM`` card
            values (when present in the HDU header) match the header and data
            of all HDU's in the file.

        ignore_missing_end : bool
            Do not issue an exception when opening a file that is missing an
            ``END`` card in the last header.
        """

        # TODO: Figure out a way to make it possible for the _File
        # constructor to be a noop if the argument is already a _File
        if not isinstance(fileobj, _File):
            fileobj = _File(fileobj)

        hdu = cls._readfrom_internal(fileobj, checksum=checksum,
                                     ignore_missing_end=ignore_missing_end,
                                     **kwargs)

        # If the checksum had to be checked the data may have already been read
        # from the file, in which case we don't want to seek relative
        fileobj.seek(hdu._data_offset + hdu._data_size, os.SEEK_SET)
        return hdu

    @deprecated_renamed_argument('clobber', 'overwrite', '2.0',
                                 message='"clobber" was deprecated in version '
                                         '2.0 and will be removed in version '
                                         '5.1. Use argument "overwrite" '
                                         'instead.')
    def writeto(self, name, output_verify='exception', overwrite=False,
                checksum=False):
        """
        Write the HDU to a new file. This is a convenience method to
        provide a user easier output interface if only one HDU needs
        to be written to a file.

        Parameters
        ----------
        name : path-like or file-like
            Output FITS file.  If the file object is already opened, it must
            be opened in a writeable mode.

        output_verify : str
            Output verification option.  Must be one of ``"fix"``,
            ``"silentfix"``, ``"ignore"``, ``"warn"``, or
            ``"exception"``.  May also be any combination of ``"fix"`` or
            ``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
            (e.g. ``"fix+warn"``).  See :ref:`astropy:verify` for more info.

        overwrite : bool, optional
            If ``True``, overwrite the output file if it exists. Raises an
            ``OSError`` if ``False`` and the output file exists. Default is
            ``False``.

            .. versionchanged:: 1.3
               ``overwrite`` replaces the deprecated ``clobber`` argument.

        checksum : bool
            When `True` adds both ``DATASUM`` and ``CHECKSUM`` cards
            to the header of the HDU when written to the file.
        """

        from .hdulist import HDUList

        hdulist = HDUList([self])
        hdulist.writeto(name, output_verify, overwrite=overwrite,
                        checksum=checksum)

    @classmethod
    def _from_data(cls, data, header, **kwargs):
        """
        Instantiate the HDU object after guessing the HDU class from the
        FITS Header.
        """
        klass = _hdu_class_from_header(cls, header)
        return klass(data=data, header=header, **kwargs)

    @classmethod
    def _readfrom_internal(cls, data, header=None, checksum=False,
                           ignore_missing_end=False, **kwargs):
        """
        Provides the bulk of the internal implementation for readfrom and
        fromstring.

        For some special cases, supports using a header that was already
        created, and just using the input data for the actual array data.
        """

        hdu_buffer = None
        hdu_fileobj = None
        header_offset = 0

        if isinstance(data, _File):
            if header is None:
                header_offset = data.tell()
                try:
                    # First we try to read the header with the fast parser
                    # from _BasicHeader, which will read only the standard
                    # 8 character keywords to get the structural keywords
                    # that are needed to build the HDU object.
                    header_str, header = _BasicHeader.fromfile(data)
                except Exception:
                    # If the fast header parsing failed, then fallback to
                    # the classic Header parser, which has better support
                    # and reporting for the various issues that can be found
                    # in the wild.
                    data.seek(header_offset)
                    header = Header.fromfile(data,
                                             endcard=not ignore_missing_end)
            hdu_fileobj = data
            data_offset = data.tell()  # *after* reading the header
        else:
            try:
                # Test that the given object supports the buffer interface by
                # ensuring an ndarray can be created from it
                np.ndarray((), dtype='ubyte', buffer=data)
            except TypeError:
                raise TypeError(
                    'The provided object {!r} does not contain an underlying '
                    'memory buffer.  fromstring() requires an object that '
                    'supports the buffer interface such as bytes, buffer, '
                    'memoryview, ndarray, etc.  This restriction is to ensure '
                    'that efficient access to the array/table data is possible.'
                    .format(data))

            if header is None:
                def block_iter(nbytes):
                    idx = 0
                    while idx < len(data):
                        yield data[idx:idx + nbytes]
                        idx += nbytes

                header_str, header = Header._from_blocks(
                    block_iter, True, '', not ignore_missing_end, True)

                if len(data) > len(header_str):
                    hdu_buffer = data
            elif data:
                hdu_buffer = data

            header_offset = 0
            data_offset = len(header_str)

        # Determine the appropriate arguments to pass to the constructor from
        # self._kwargs.  self._kwargs contains any number of optional arguments
        # that may or may not be valid depending on the HDU type
        cls = _hdu_class_from_header(cls, header)
        sig = signature(cls.__init__)
        new_kwargs = kwargs.copy()
        if Parameter.VAR_KEYWORD not in (x.kind for x in sig.parameters.values()):
            # If __init__ accepts arbitrary keyword arguments, then we can go
            # ahead and pass all keyword arguments; otherwise we need to delete
            # any that are invalid
            for key in kwargs:
                if key not in sig.parameters:
                    del new_kwargs[key]

        try:
            hdu = cls(data=DELAYED, header=header, **new_kwargs)
        except TypeError:
            # This may happen because some HDU class (e.g. GroupsHDU) wants
            # to set a keyword on the header, which is not possible with the
            # _BasicHeader. While HDU classes should not need to modify the
            # header in general, sometimes this is needed to fix it. So in
            # this case we build a full Header and try again to create the
            # HDU object.
            if isinstance(header, _BasicHeader):
                header = Header.fromstring(header_str)
                hdu = cls(data=DELAYED, header=header, **new_kwargs)
            else:
                raise

        # One of these may be None, depending on whether the data came from a
        # file or a string buffer--later this will be further abstracted
        hdu._file = hdu_fileobj
        hdu._buffer = hdu_buffer

        hdu._header_offset = header_offset     # beginning of the header area
        hdu._data_offset = data_offset         # beginning of the data area

        # data area size, including padding
        size = hdu.size
        hdu._data_size = size + _pad_length(size)

        if isinstance(hdu._header, _BasicHeader):
            # Delete the temporary _BasicHeader.
            # We need to do this before an eventual checksum computation,
            # since it needs to modify temporarily the header
            #
            # The header string is stored in the HDU._header_str attribute,
            # so that it can be used directly when we need to create the
            # classic Header object, without having to parse again the file.
            del hdu._header
            hdu._header_str = header_str

        # Checksums are not checked on invalid HDU types
        if checksum and checksum != 'remove' and isinstance(hdu, _ValidHDU):
            hdu._verify_checksum_datasum()

        return hdu

    def _get_raw_data(self, shape, code, offset):
        """
        Return raw array from either the HDU's memory buffer or underlying
        file.
        """

        if isinstance(shape, int):
            shape = (shape,)

        if self._buffer:
            return np.ndarray(shape, dtype=code, buffer=self._buffer,
                              offset=offset)
        elif self._file:
            return self._file.readarray(offset=offset, dtype=code, shape=shape)
        else:
            return None

    # TODO: Rework checksum handling so that it's not necessary to add a
    # checksum argument here
    # TODO: The BaseHDU class shouldn't even handle checksums since they're
    # only implemented on _ValidHDU...
    def _prewriteto(self, checksum=False, inplace=False):
        self._update_pseudo_int_scale_keywords()

        # Handle checksum
        self._update_checksum(checksum)

    def _update_pseudo_int_scale_keywords(self):
        """
        If the data is signed int 8, unsigned int 16, 32, or 64,
        add BSCALE/BZERO cards to header.
        """

        if (self._has_data and self._standard and
                _is_pseudo_integer(self.data.dtype)):
            # CompImageHDUs need TFIELDS immediately after GCOUNT,
            # so BSCALE has to go after TFIELDS if it exists.
            if 'TFIELDS' in self._header:
                self._header.set('BSCALE', 1, after='TFIELDS')
            elif 'GCOUNT' in self._header:
                self._header.set('BSCALE', 1, after='GCOUNT')
            else:
                self._header.set('BSCALE', 1)
            self._header.set('BZERO', _pseudo_zero(self.data.dtype),
                             after='BSCALE')

    def _update_checksum(self, checksum, checksum_keyword='CHECKSUM',
                         datasum_keyword='DATASUM'):
        """Update the 'CHECKSUM' and 'DATASUM' keywords in the header (or
        keywords with equivalent semantics given by the ``checksum_keyword``
        and ``datasum_keyword`` arguments--see for example ``CompImageHDU``
        for an example of why this might need to be overridden).
        """

        # If the data is loaded it isn't necessarily 'modified', but we have no
        # way of knowing for sure
        modified = self._header._modified or self._data_loaded

        if checksum == 'remove':
            if checksum_keyword in self._header:
                del self._header[checksum_keyword]

            if datasum_keyword in self._header:
                del self._header[datasum_keyword]
        elif (modified or self._new or
                (checksum and ('CHECKSUM' not in self._header or
                               'DATASUM' not in self._header or
                               not self._checksum_valid or
                               not self._datasum_valid))):
            if checksum == 'datasum':
                self.add_datasum(datasum_keyword=datasum_keyword)
            elif checksum:
                self.add_checksum(checksum_keyword=checksum_keyword,
                                  datasum_keyword=datasum_keyword)

    def _postwriteto(self):
        # If data is unsigned integer 16, 32 or 64, remove the
        # BSCALE/BZERO cards
        if (self._has_data and self._standard and
                _is_pseudo_integer(self.data.dtype)):
            for keyword in ('BSCALE', 'BZERO'):
                with suppress(KeyError):
                    del self._header[keyword]

    def _writeheader(self, fileobj):
        offset = 0
        with suppress(AttributeError, OSError):
            offset = fileobj.tell()

        self._header.tofile(fileobj)

        try:
            size = fileobj.tell() - offset
        except (AttributeError, OSError):
            size = len(str(self._header))

        return offset, size

    def _writedata(self, fileobj):
        size = 0
        fileobj.flush()
        try:
            offset = fileobj.tell()
        except (AttributeError, OSError):
            offset = 0

        if self._data_loaded or self._data_needs_rescale:
            if self.data is not None:
                size += self._writedata_internal(fileobj)
            # pad the FITS data block
            # to avoid a bug in the lustre filesystem client, don't
            # write zero-byte objects
            if size > 0 and _pad_length(size) > 0:
                padding = _pad_length(size) * self._padding_byte
                # TODO: Not that this is ever likely, but if for some odd
                # reason _padding_byte is > 0x80 this will fail; but really if
                # somebody's custom fits format is doing that, they're doing it
                # wrong and should be reprimanded harshly.
                fileobj.write(padding.encode('ascii'))
                size += len(padding)
        else:
            # The data has not been modified or does not need need to be
            # rescaled, so it can be copied, unmodified, directly from an
            # existing file or buffer
            size += self._writedata_direct_copy(fileobj)

        # flush, to make sure the content is written
        fileobj.flush()

        # return both the location and the size of the data area
        return offset, size

    def _writedata_internal(self, fileobj):
        """
        The beginning and end of most _writedata() implementations are the
        same, but the details of writing the data array itself can vary between
        HDU types, so that should be implemented in this method.

        Should return the size in bytes of the data written.
        """

        fileobj.writearray(self.data)
        return self.data.size * self.data.itemsize

    def _writedata_direct_copy(self, fileobj):
        """Copies the data directly from one file/buffer to the new file.

        For now this is handled by loading the raw data from the existing data
        (including any padding) via a memory map or from an already in-memory
        buffer and using Numpy's existing file-writing facilities to write to
        the new file.

        If this proves too slow a more direct approach may be used.
        """
        raw = self._get_raw_data(self._data_size, 'ubyte', self._data_offset)
        if raw is not None:
            fileobj.writearray(raw)
            return raw.nbytes
        else:
            return 0

    # TODO: This is the start of moving HDU writing out of the _File class;
    # Though right now this is an internal private method (though still used by
    # HDUList, eventually the plan is to have this be moved into writeto()
    # somehow...
    def _writeto(self, fileobj, inplace=False, copy=False):
        try:
            dirname = os.path.dirname(fileobj._file.name)
        except (AttributeError, TypeError):
            dirname = None

        with _free_space_check(self, dirname):
            self._writeto_internal(fileobj, inplace, copy)

    def _writeto_internal(self, fileobj, inplace, copy):
        # For now fileobj is assumed to be a _File object
        if not inplace or self._new:
            header_offset, _ = self._writeheader(fileobj)
            data_offset, data_size = self._writedata(fileobj)

            # Set the various data location attributes on newly-written HDUs
            if self._new:
                self._header_offset = header_offset
                self._data_offset = data_offset
                self._data_size = data_size
            return

        hdrloc = self._header_offset
        hdrsize = self._data_offset - self._header_offset
        datloc = self._data_offset
        datsize = self._data_size

        if self._header._modified:
            # Seek to the original header location in the file
            self._file.seek(hdrloc)
            # This should update hdrloc with he header location in the new file
            hdrloc, hdrsize = self._writeheader(fileobj)

            # If the data is to be written below with self._writedata, that
            # will also properly update the data location; but it should be
            # updated here too
            datloc = hdrloc + hdrsize
        elif copy:
            # Seek to the original header location in the file
            self._file.seek(hdrloc)
            # Before writing, update the hdrloc with the current file position,
            # which is the hdrloc for the new file
            hdrloc = fileobj.tell()
            fileobj.write(self._file.read(hdrsize))
            # The header size is unchanged, but the data location may be
            # different from before depending on if previous HDUs were resized
            datloc = fileobj.tell()

        if self._data_loaded:
            if self.data is not None:
                # Seek through the array's bases for an memmap'd array; we
                # can't rely on the _File object to give us this info since
                # the user may have replaced the previous mmap'd array
                if copy or self._data_replaced:
                    # Of course, if we're copying the data to a new file
                    # we don't care about flushing the original mmap;
                    # instead just read it into the new file
                    array_mmap = None
                else:
                    array_mmap = _get_array_mmap(self.data)

                if array_mmap is not None:
                    array_mmap.flush()
                else:
                    self._file.seek(self._data_offset)
                    datloc, datsize = self._writedata(fileobj)
        elif copy:
            datsize = self._writedata_direct_copy(fileobj)

        self._header_offset = hdrloc
        self._data_offset = datloc
        self._data_size = datsize
        self._data_replaced = False

    def _close(self, closed=True):
        # If the data was mmap'd, close the underlying mmap (this will
        # prevent any future access to the .data attribute if there are
        # not other references to it; if there are other references then
        # it is up to the user to clean those up
        if (closed and self._data_loaded and
                _get_array_mmap(self.data) is not None):
            del self.data


# For backwards-compatibility, though nobody should have
# been using this directly:
_AllHDU = _BaseHDU

# For convenience...
# TODO: register_hdu could be made into a class decorator which would be pretty
# cool, but only once 2.6 support is dropped.
register_hdu = _BaseHDU.register_hdu
unregister_hdu = _BaseHDU.unregister_hdu


class _CorruptedHDU(_BaseHDU):
    """
    A Corrupted HDU class.

    This class is used when one or more mandatory `Card`s are
    corrupted (unparsable), such as the ``BITPIX``, ``NAXIS``, or
    ``END`` cards.  A corrupted HDU usually means that the data size
    cannot be calculated or the ``END`` card is not found.  In the case
    of a missing ``END`` card, the `Header` may also contain the binary
    data

    .. note::
       In future, it may be possible to decipher where the last block
       of the `Header` ends, but this task may be difficult when the
       extension is a `TableHDU` containing ASCII data.
    """

    @property
    def size(self):
        """
        Returns the size (in bytes) of the HDU's data part.
        """

        # Note: On compressed files this might report a negative size; but the
        # file is corrupt anyways so I'm not too worried about it.
        if self._buffer is not None:
            return len(self._buffer) - self._data_offset

        return self._file.size - self._data_offset

    def _summary(self):
        return (self.name, self.ver, 'CorruptedHDU')

    def verify(self):
        pass


class _NonstandardHDU(_BaseHDU, _Verify):
    """
    A Non-standard HDU class.

    This class is used for a Primary HDU when the ``SIMPLE`` Card has
    a value of `False`.  A non-standard HDU comes from a file that
    resembles a FITS file but departs from the standards in some
    significant way.  One example would be files where the numbers are
    in the DEC VAX internal storage format rather than the standard
    FITS most significant byte first.  The header for this HDU should
    be valid.  The data for this HDU is read from the file as a byte
    stream that begins at the first byte after the header ``END`` card
    and continues until the end of the file.
    """

    _standard = False

    @classmethod
    def match_header(cls, header):
        """
        Matches any HDU that has the 'SIMPLE' keyword but is not a standard
        Primary or Groups HDU.
        """

        # The SIMPLE keyword must be in the first card
        card = header.cards[0]

        # The check that 'GROUPS' is missing is a bit redundant, since the
        # match_header for GroupsHDU will always be called before this one.
        if card.keyword == 'SIMPLE':
            if 'GROUPS' not in header and card.value is False:
                return True
            else:
                raise InvalidHDUException
        else:
            return False

    @property
    def size(self):
        """
        Returns the size (in bytes) of the HDU's data part.
        """

        if self._buffer is not None:
            return len(self._buffer) - self._data_offset

        return self._file.size - self._data_offset

    def _writedata(self, fileobj):
        """
        Differs from the base class :class:`_writedata` in that it doesn't
        automatically add padding, and treats the data as a string of raw bytes
        instead of an array.
        """

        offset = 0
        size = 0

        fileobj.flush()
        try:
            offset = fileobj.tell()
        except OSError:
            offset = 0

        if self.data is not None:
            fileobj.write(self.data)
            # flush, to make sure the content is written
            fileobj.flush()
            size = len(self.data)

        # return both the location and the size of the data area
        return offset, size

    def _summary(self):
        return (self.name, self.ver, 'NonstandardHDU', len(self._header))

    @lazyproperty
    def data(self):
        """
        Return the file data.
        """

        return self._get_raw_data(self.size, 'ubyte', self._data_offset)

    def _verify(self, option='warn'):
        errs = _ErrList([], unit='Card')

        # verify each card
        for card in self._header.cards:
            errs.append(card._verify(option))

        return errs


class _ValidHDU(_BaseHDU, _Verify):
    """
    Base class for all HDUs which are not corrupted.
    """

    def __init__(self, data=None, header=None, name=None, ver=None, **kwargs):
        super().__init__(data=data, header=header)

        if (header is not None and
                not isinstance(header, (Header, _BasicHeader))):
            # TODO: Instead maybe try initializing a new Header object from
            # whatever is passed in as the header--there are various types
            # of objects that could work for this...
            raise ValueError('header must be a Header object')

        # NOTE:  private data members _checksum and _datasum are used by the
        # utility script "fitscheck" to detect missing checksums.
        self._checksum = None
        self._checksum_valid = None
        self._datasum = None
        self._datasum_valid = None

        if name is not None:
            self.name = name
        if ver is not None:
            self.ver = ver

    @classmethod
    def match_header(cls, header):
        """
        Matches any HDU that is not recognized as having either the SIMPLE or
        XTENSION keyword in its header's first card, but is nonetheless not
        corrupted.

        TODO: Maybe it would make more sense to use _NonstandardHDU in this
        case?  Not sure...
        """

        return first(header.keys()) not in ('SIMPLE', 'XTENSION')

    @property
    def size(self):
        """
        Size (in bytes) of the data portion of the HDU.
        """

        size = 0
        naxis = self._header.get('NAXIS', 0)
        if naxis > 0:
            size = 1
            for idx in range(naxis):
                size = size * self._header['NAXIS' + str(idx + 1)]
            bitpix = self._header['BITPIX']
            gcount = self._header.get('GCOUNT', 1)
            pcount = self._header.get('PCOUNT', 0)
            size = abs(bitpix) * gcount * (pcount + size) // 8
        return size

    def filebytes(self):
        """
        Calculates and returns the number of bytes that this HDU will write to
        a file.
        """

        f = _File()
        # TODO: Fix this once new HDU writing API is settled on
        return self._writeheader(f)[1] + self._writedata(f)[1]

    def fileinfo(self):
        """
        Returns a dictionary detailing information about the locations
        of this HDU within any associated file.  The values are only
        valid after a read or write of the associated file with no
        intervening changes to the `HDUList`.

        Returns
        -------
        dict or None

           The dictionary details information about the locations of
           this HDU within an associated file.  Returns `None` when
           the HDU is not associated with a file.

           Dictionary contents:

           ========== ================================================
           Key        Value
           ========== ================================================
           file       File object associated with the HDU
           filemode   Mode in which the file was opened (readonly, copyonwrite,
                      update, append, ostream)
           hdrLoc     Starting byte location of header in file
           datLoc     Starting byte location of data block in file
           datSpan    Data size including padding
           ========== ================================================
        """

        if hasattr(self, '_file') and self._file:
            return {'file': self._file, 'filemode': self._file.mode,
                    'hdrLoc': self._header_offset, 'datLoc': self._data_offset,
                    'datSpan': self._data_size}
        else:
            return None

    def copy(self):
        """
        Make a copy of the HDU, both header and data are copied.
        """

        if self.data is not None:
            data = self.data.copy()
        else:
            data = None
        return self.__class__(data=data, header=self._header.copy())

    def _verify(self, option='warn'):
        errs = _ErrList([], unit='Card')

        is_valid = BITPIX2DTYPE.__contains__

        # Verify location and value of mandatory keywords.
        # Do the first card here, instead of in the respective HDU classes, so
        # the checking is in order, in case of required cards in wrong order.
        if isinstance(self, ExtensionHDU):
            firstkey = 'XTENSION'
            firstval = self._extension
        else:
            firstkey = 'SIMPLE'
            firstval = True

        self.req_cards(firstkey, 0, None, firstval, option, errs)
        self.req_cards('BITPIX', 1, lambda v: (_is_int(v) and is_valid(v)), 8,
                       option, errs)
        self.req_cards('NAXIS', 2,
                       lambda v: (_is_int(v) and 0 <= v <= 999), 0,
                       option, errs)

        naxis = self._header.get('NAXIS', 0)
        if naxis < 1000:
            for ax in range(3, naxis + 3):
                key = 'NAXIS' + str(ax - 2)
                self.req_cards(key, ax,
                               lambda v: (_is_int(v) and v >= 0),
                               _extract_number(self._header[key], default=1),
                               option, errs)

            # Remove NAXISj cards where j is not in range 1, naxis inclusive.
            for keyword in self._header:
                if keyword.startswith('NAXIS') and len(keyword) > 5:
                    try:
                        number = int(keyword[5:])
                        if number <= 0 or number > naxis:
                            raise ValueError
                    except ValueError:
                        err_text = ("NAXISj keyword out of range ('{}' when "
                                    "NAXIS == {})".format(keyword, naxis))

                        def fix(self=self, keyword=keyword):
                            del self._header[keyword]

                        errs.append(
                            self.run_option(option=option, err_text=err_text,
                                            fix=fix, fix_text="Deleted."))

        # Verify that the EXTNAME keyword exists and is a string
        if 'EXTNAME' in self._header:
            if not isinstance(self._header['EXTNAME'], str):
                err_text = 'The EXTNAME keyword must have a string value.'
                fix_text = 'Converted the EXTNAME keyword to a string value.'

                def fix(header=self._header):
                    header['EXTNAME'] = str(header['EXTNAME'])

                errs.append(self.run_option(option, err_text=err_text,
                                            fix_text=fix_text, fix=fix))

        # verify each card
        for card in self._header.cards:
            errs.append(card._verify(option))

        return errs

    # TODO: Improve this API a little bit--for one, most of these arguments
    # could be optional
    def req_cards(self, keyword, pos, test, fix_value, option, errlist):
        """
        Check the existence, location, and value of a required `Card`.

        Parameters
        ----------
        keyword : str
            The keyword to validate

        pos : int, callable
            If an ``int``, this specifies the exact location this card should
            have in the header.  Remember that Python is zero-indexed, so this
            means ``pos=0`` requires the card to be the first card in the
            header.  If given a callable, it should take one argument--the
            actual position of the keyword--and return `True` or `False`.  This
            can be used for custom evaluation.  For example if
            ``pos=lambda idx: idx > 10`` this will check that the keyword's
            index is greater than 10.

        test : callable
            This should be a callable (generally a function) that is passed the
            value of the given keyword and returns `True` or `False`.  This can
            be used to validate the value associated with the given keyword.

        fix_value : str, int, float, complex, bool, None
            A valid value for a FITS keyword to to use if the given ``test``
            fails to replace an invalid value.  In other words, this provides
            a default value to use as a replacement if the keyword's current
            value is invalid.  If `None`, there is no replacement value and the
            keyword is unfixable.

        option : str
            Output verification option.  Must be one of ``"fix"``,
            ``"silentfix"``, ``"ignore"``, ``"warn"``, or
            ``"exception"``.  May also be any combination of ``"fix"`` or
            ``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
            (e.g. ``"fix+warn"``).  See :ref:`astropy:verify` for more info.

        errlist : list
            A list of validation errors already found in the FITS file; this is
            used primarily for the validation system to collect errors across
            multiple HDUs and multiple calls to `req_cards`.

        Notes
        -----
        If ``pos=None``, the card can be anywhere in the header.  If the card
        does not exist, the new card will have the ``fix_value`` as its value
        when created.  Also check the card's value by using the ``test``
        argument.
        """

        errs = errlist
        fix = None

        try:
            index = self._header.index(keyword)
        except ValueError:
            index = None

        fixable = fix_value is not None

        insert_pos = len(self._header) + 1

        # If pos is an int, insert at the given position (and convert it to a
        # lambda)
        if _is_int(pos):
            insert_pos = pos
            pos = lambda x: x == insert_pos

        # if the card does not exist
        if index is None:
            err_text = f"'{keyword}' card does not exist."
            fix_text = f"Fixed by inserting a new '{keyword}' card."
            if fixable:
                # use repr to accommodate both string and non-string types
                # Boolean is also OK in this constructor
                card = (keyword, fix_value)

                def fix(self=self, insert_pos=insert_pos, card=card):
                    self._header.insert(insert_pos, card)

            errs.append(self.run_option(option, err_text=err_text,
                        fix_text=fix_text, fix=fix, fixable=fixable))
        else:
            # if the supposed location is specified
            if pos is not None:
                if not pos(index):
                    err_text = f"'{keyword}' card at the wrong place (card {index})."
                    fix_text = f"Fixed by moving it to the right place (card {insert_pos})."

                    def fix(self=self, index=index, insert_pos=insert_pos):
                        card = self._header.cards[index]
                        del self._header[index]
                        self._header.insert(insert_pos, card)

                    errs.append(self.run_option(option, err_text=err_text,
                                fix_text=fix_text, fix=fix))

            # if value checking is specified
            if test:
                val = self._header[keyword]
                if not test(val):
                    err_text = f"'{keyword}' card has invalid value '{val}'."
                    fix_text = f"Fixed by setting a new value '{fix_value}'."

                    if fixable:
                        def fix(self=self, keyword=keyword, val=fix_value):
                            self._header[keyword] = fix_value

                    errs.append(self.run_option(option, err_text=err_text,
                                fix_text=fix_text, fix=fix, fixable=fixable))

        return errs

    def add_datasum(self, when=None, datasum_keyword='DATASUM'):
        """
        Add the ``DATASUM`` card to this HDU with the value set to the
        checksum calculated for the data.

        Parameters
        ----------
        when : str, optional
            Comment string for the card that by default represents the
            time when the checksum was calculated

        datasum_keyword : str, optional
            The name of the header keyword to store the datasum value in;
            this is typically 'DATASUM' per convention, but there exist
            use cases in which a different keyword should be used

        Returns
        -------
        checksum : int
            The calculated datasum

        Notes
        -----
        For testing purposes, provide a ``when`` argument to enable the comment
        value in the card to remain consistent.  This will enable the
        generation of a ``CHECKSUM`` card with a consistent value.
        """

        cs = self._calculate_datasum()

        if when is None:
            when = f'data unit checksum updated {self._get_timestamp()}'

        self._header[datasum_keyword] = (str(cs), when)
        return cs

    def add_checksum(self, when=None, override_datasum=False,
                     checksum_keyword='CHECKSUM', datasum_keyword='DATASUM'):
        """
        Add the ``CHECKSUM`` and ``DATASUM`` cards to this HDU with
        the values set to the checksum calculated for the HDU and the
        data respectively.  The addition of the ``DATASUM`` card may
        be overridden.

        Parameters
        ----------
        when : str, optional
           comment string for the cards; by default the comments
           will represent the time when the checksum was calculated

        override_datasum : bool, optional
           add the ``CHECKSUM`` card only

        checksum_keyword : str, optional
            The name of the header keyword to store the checksum value in; this
            is typically 'CHECKSUM' per convention, but there exist use cases
            in which a different keyword should be used

        datasum_keyword : str, optional
            See ``checksum_keyword``

        Notes
        -----
        For testing purposes, first call `add_datasum` with a ``when``
        argument, then call `add_checksum` with a ``when`` argument and
        ``override_datasum`` set to `True`.  This will provide consistent
        comments for both cards and enable the generation of a ``CHECKSUM``
        card with a consistent value.
        """

        if not override_datasum:
            # Calculate and add the data checksum to the header.
            data_cs = self.add_datasum(when, datasum_keyword=datasum_keyword)
        else:
            # Just calculate the data checksum
            data_cs = self._calculate_datasum()

        if when is None:
            when = f'HDU checksum updated {self._get_timestamp()}'

        # Add the CHECKSUM card to the header with a value of all zeros.
        if datasum_keyword in self._header:
            self._header.set(checksum_keyword, '0' * 16, when,
                             before=datasum_keyword)
        else:
            self._header.set(checksum_keyword, '0' * 16, when)

        csum = self._calculate_checksum(data_cs,
                                        checksum_keyword=checksum_keyword)
        self._header[checksum_keyword] = csum

    def verify_datasum(self):
        """
        Verify that the value in the ``DATASUM`` keyword matches the value
        calculated for the ``DATASUM`` of the current HDU data.

        Returns
        -------
        valid : int
           - 0 - failure
           - 1 - success
           - 2 - no ``DATASUM`` keyword present
        """

        if 'DATASUM' in self._header:
            datasum = self._calculate_datasum()
            if datasum == int(self._header['DATASUM']):
                return 1
            else:
                # Failed
                return 0
        else:
            return 2

    def verify_checksum(self):
        """
        Verify that the value in the ``CHECKSUM`` keyword matches the
        value calculated for the current HDU CHECKSUM.

        Returns
        -------
        valid : int
           - 0 - failure
           - 1 - success
           - 2 - no ``CHECKSUM`` keyword present
        """

        if 'CHECKSUM' in self._header:
            if 'DATASUM' in self._header:
                datasum = self._calculate_datasum()
            else:
                datasum = 0
            checksum = self._calculate_checksum(datasum)
            if checksum == self._header['CHECKSUM']:
                return 1
            else:
                # Failed
                return 0
        else:
            return 2

    def _verify_checksum_datasum(self):
        """
        Verify the checksum/datasum values if the cards exist in the header.
        Simply displays warnings if either the checksum or datasum don't match.
        """

        if 'CHECKSUM' in self._header:
            self._checksum = self._header['CHECKSUM']
            self._checksum_valid = self.verify_checksum()
            if not self._checksum_valid:
                warnings.warn(
                    'Checksum verification failed for HDU {}.\n'.format(
                        (self.name, self.ver)), AstropyUserWarning)

        if 'DATASUM' in self._header:
            self._datasum = self._header['DATASUM']
            self._datasum_valid = self.verify_datasum()
            if not self._datasum_valid:
                warnings.warn(
                    'Datasum verification failed for HDU {}.\n'.format(
                        (self.name, self.ver)), AstropyUserWarning)

    def _get_timestamp(self):
        """
        Return the current timestamp in ISO 8601 format, with microseconds
        stripped off.

        Ex.: 2007-05-30T19:05:11
        """

        return datetime.datetime.now().isoformat()[:19]

    def _calculate_datasum(self):
        """
        Calculate the value for the ``DATASUM`` card in the HDU.
        """

        if not self._data_loaded:
            # This is the case where the data has not been read from the file
            # yet.  We find the data in the file, read it, and calculate the
            # datasum.
            if self.size > 0:
                raw_data = self._get_raw_data(self._data_size, 'ubyte',
                                              self._data_offset)
                return self._compute_checksum(raw_data)
            else:
                return 0
        elif self.data is not None:
            return self._compute_checksum(self.data.view('ubyte'))
        else:
            return 0

    def _calculate_checksum(self, datasum, checksum_keyword='CHECKSUM'):
        """
        Calculate the value of the ``CHECKSUM`` card in the HDU.
        """

        old_checksum = self._header[checksum_keyword]
        self._header[checksum_keyword] = '0' * 16

        # Convert the header to bytes.
        s = self._header.tostring().encode('utf8')

        # Calculate the checksum of the Header and data.
        cs = self._compute_checksum(np.frombuffer(s, dtype='ubyte'), datasum)

        # Encode the checksum into a string.
        s = self._char_encode(~cs)

        # Return the header card value.
        self._header[checksum_keyword] = old_checksum

        return s

    def _compute_checksum(self, data, sum32=0):
        """
        Compute the ones-complement checksum of a sequence of bytes.

        Parameters
        ----------
        data
            a memory region to checksum

        sum32
            incremental checksum value from another region

        Returns
        -------
        ones complement checksum
        """

        blocklen = 2880
        sum32 = np.uint32(sum32)
        for i in range(0, len(data), blocklen):
            length = min(blocklen, len(data) - i)   # ????
            sum32 = self._compute_hdu_checksum(data[i:i + length], sum32)
        return sum32

    def _compute_hdu_checksum(self, data, sum32=0):
        """
        Translated from FITS Checksum Proposal by Seaman, Pence, and Rots.
        Use uint32 literals as a hedge against type promotion to int64.

        This code should only be called with blocks of 2880 bytes
        Longer blocks result in non-standard checksums with carry overflow
        Historically,  this code *was* called with larger blocks and for that
        reason still needs to be for backward compatibility.
        """

        u8 = np.uint32(8)
        u16 = np.uint32(16)
        uFFFF = np.uint32(0xFFFF)

        if data.nbytes % 2:
            last = data[-1]
            data = data[:-1]
        else:
            last = np.uint32(0)

        data = data.view('>u2')

        hi = sum32 >> u16
        lo = sum32 & uFFFF
        hi += np.add.reduce(data[0::2], dtype=np.uint64)
        lo += np.add.reduce(data[1::2], dtype=np.uint64)

        if (data.nbytes // 2) % 2:
            lo += last << u8
        else:
            hi += last << u8

        hicarry = hi >> u16
        locarry = lo >> u16

        while hicarry or locarry:
            hi = (hi & uFFFF) + locarry
            lo = (lo & uFFFF) + hicarry
            hicarry = hi >> u16
            locarry = lo >> u16

        return (hi << u16) + lo

    # _MASK and _EXCLUDE used for encoding the checksum value into a character
    # string.
    _MASK = [0xFF000000,
             0x00FF0000,
             0x0000FF00,
             0x000000FF]

    _EXCLUDE = [0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
                0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60]

    def _encode_byte(self, byte):
        """
        Encode a single byte.
        """

        quotient = byte // 4 + ord('0')
        remainder = byte % 4

        ch = np.array(
            [(quotient + remainder), quotient, quotient, quotient],
            dtype='int32')

        check = True
        while check:
            check = False
            for x in self._EXCLUDE:
                for j in [0, 2]:
                    if ch[j] == x or ch[j + 1] == x:
                        ch[j] += 1
                        ch[j + 1] -= 1
                        check = True
        return ch

    def _char_encode(self, value):
        """
        Encodes the checksum ``value`` using the algorithm described
        in SPR section A.7.2 and returns it as a 16 character string.

        Parameters
        ----------
        value
            a checksum

        Returns
        -------
        ascii encoded checksum
        """

        value = np.uint32(value)

        asc = np.zeros((16,), dtype='byte')
        ascii = np.zeros((16,), dtype='byte')

        for i in range(4):
            byte = (value & self._MASK[i]) >> ((3 - i) * 8)
            ch = self._encode_byte(byte)
            for j in range(4):
                asc[4 * j + i] = ch[j]

        for i in range(16):
            ascii[i] = asc[(i + 15) % 16]

        return decode_ascii(ascii.tobytes())


class ExtensionHDU(_ValidHDU):
    """
    An extension HDU class.

    This class is the base class for the `TableHDU`, `ImageHDU`, and
    `BinTableHDU` classes.
    """

    _extension = ''

    @classmethod
    def match_header(cls, header):
        """
        This class should never be instantiated directly.  Either a standard
        extension HDU type should be used for a specific extension, or
        NonstandardExtHDU should be used.
        """

        raise NotImplementedError

    @deprecated_renamed_argument('clobber', 'overwrite', '2.0',
                                 message='"clobber" was deprecated in version '
                                         '2.0 and will be removed in version '
                                         '5.1. Use argument "overwrite" '
                                         'instead.')
    def writeto(self, name, output_verify='exception', overwrite=False,
                checksum=False):
        """
        Works similarly to the normal writeto(), but prepends a default
        `PrimaryHDU` are required by extension HDUs (which cannot stand on
        their own).

        .. versionchanged:: 1.3
           ``overwrite`` replaces the deprecated ``clobber`` argument.
        """

        from .hdulist import HDUList
        from .image import PrimaryHDU

        hdulist = HDUList([PrimaryHDU(), self])
        hdulist.writeto(name, output_verify, overwrite=overwrite,
                        checksum=checksum)

    def _verify(self, option='warn'):

        errs = super()._verify(option=option)

        # Verify location and value of mandatory keywords.
        naxis = self._header.get('NAXIS', 0)
        self.req_cards('PCOUNT', naxis + 3, lambda v: (_is_int(v) and v >= 0),
                       0, option, errs)
        self.req_cards('GCOUNT', naxis + 4, lambda v: (_is_int(v) and v == 1),
                       1, option, errs)

        return errs


# For backwards compatibility, though this needs to be deprecated
# TODO: Mark this as deprecated
_ExtensionHDU = ExtensionHDU


class NonstandardExtHDU(ExtensionHDU):
    """
    A Non-standard Extension HDU class.

    This class is used for an Extension HDU when the ``XTENSION``
    `Card` has a non-standard value.  In this case, Astropy can figure
    out how big the data is but not what it is.  The data for this HDU
    is read from the file as a byte stream that begins at the first
    byte after the header ``END`` card and continues until the
    beginning of the next header or the end of the file.
    """

    _standard = False

    @classmethod
    def match_header(cls, header):
        """
        Matches any extension HDU that is not one of the standard extension HDU
        types.
        """

        card = header.cards[0]
        xtension = card.value
        if isinstance(xtension, str):
            xtension = xtension.rstrip()
        # A3DTABLE is not really considered a 'standard' extension, as it was
        # sort of the prototype for BINTABLE; however, since our BINTABLE
        # implementation handles A3DTABLE HDUs it is listed here.
        standard_xtensions = ('IMAGE', 'TABLE', 'BINTABLE', 'A3DTABLE')
        # The check that xtension is not one of the standard types should be
        # redundant.
        return (card.keyword == 'XTENSION' and
                xtension not in standard_xtensions)

    def _summary(self):
        axes = tuple(self.data.shape)
        return (self.name, self.ver, 'NonstandardExtHDU', len(self._header), axes)

    @lazyproperty
    def data(self):
        """
        Return the file data.
        """

        return self._get_raw_data(self.size, 'ubyte', self._data_offset)


# TODO: Mark this as deprecated
_NonstandardExtHDU = NonstandardExtHDU