1# Licensed under a 3-clause BSD style license - see PYFITS.rst
2
3import sys
4import mmap
5import warnings
6
7import numpy as np
8
9from .base import DELAYED, _ValidHDU, ExtensionHDU, BITPIX2DTYPE, DTYPE2BITPIX
10from astropy.io.fits.header import Header
11from astropy.io.fits.util import _is_pseudo_integer, _pseudo_zero, _is_int
12from astropy.io.fits.verify import VerifyWarning
13
14from astropy.utils import isiterable, lazyproperty
15
16try:
17    from dask.array import Array as DaskArray
18except ImportError:
19    class DaskArray:
20        pass
21
22
23__all__ = ["Section", "PrimaryHDU", "ImageHDU"]
24
25
26class _ImageBaseHDU(_ValidHDU):
27    """FITS image HDU base class.
28
29    Attributes
30    ----------
31    header
32        image header
33
34    data
35        image data
36    """
37
38    standard_keyword_comments = {
39        'SIMPLE': 'conforms to FITS standard',
40        'XTENSION': 'Image extension',
41        'BITPIX': 'array data type',
42        'NAXIS': 'number of array dimensions',
43        'GROUPS': 'has groups',
44        'PCOUNT': 'number of parameters',
45        'GCOUNT': 'number of groups'
46    }
47
48    def __init__(self, data=None, header=None, do_not_scale_image_data=False,
49                 uint=True, scale_back=False, ignore_blank=False, **kwargs):
50
51        from .groups import GroupsHDU
52
53        super().__init__(data=data, header=header)
54
55        if data is DELAYED:
56            # Presumably if data is DELAYED then this HDU is coming from an
57            # open file, and was not created in memory
58            if header is None:
59                # this should never happen
60                raise ValueError('No header to setup HDU.')
61        else:
62            # TODO: Some of this card manipulation should go into the
63            # PrimaryHDU and GroupsHDU subclasses
64            # construct a list of cards of minimal header
65            if isinstance(self, ExtensionHDU):
66                c0 = ('XTENSION', 'IMAGE',
67                      self.standard_keyword_comments['XTENSION'])
68            else:
69                c0 = ('SIMPLE', True, self.standard_keyword_comments['SIMPLE'])
70            cards = [
71                c0,
72                ('BITPIX', 8, self.standard_keyword_comments['BITPIX']),
73                ('NAXIS', 0, self.standard_keyword_comments['NAXIS'])]
74
75            if isinstance(self, GroupsHDU):
76                cards.append(('GROUPS', True,
77                             self.standard_keyword_comments['GROUPS']))
78
79            if isinstance(self, (ExtensionHDU, GroupsHDU)):
80                cards.append(('PCOUNT', 0,
81                              self.standard_keyword_comments['PCOUNT']))
82                cards.append(('GCOUNT', 1,
83                              self.standard_keyword_comments['GCOUNT']))
84
85            if header is not None:
86                orig = header.copy()
87                header = Header(cards)
88                header.extend(orig, strip=True, update=True, end=True)
89            else:
90                header = Header(cards)
91
92            self._header = header
93
94        self._do_not_scale_image_data = do_not_scale_image_data
95
96        self._uint = uint
97        self._scale_back = scale_back
98
99        # Keep track of whether BZERO/BSCALE were set from the header so that
100        # values for self._orig_bzero and self._orig_bscale can be set
101        # properly, if necessary, once the data has been set.
102        bzero_in_header = 'BZERO' in self._header
103        bscale_in_header = 'BSCALE' in self._header
104        self._bzero = self._header.get('BZERO', 0)
105        self._bscale = self._header.get('BSCALE', 1)
106
107        # Save off other important values from the header needed to interpret
108        # the image data
109        self._axes = [self._header.get('NAXIS' + str(axis + 1), 0)
110                      for axis in range(self._header.get('NAXIS', 0))]
111
112        # Not supplying a default for BITPIX makes sense because BITPIX
113        # is either in the header or should be determined from the dtype of
114        # the data (which occurs when the data is set).
115        self._bitpix = self._header.get('BITPIX')
116        self._gcount = self._header.get('GCOUNT', 1)
117        self._pcount = self._header.get('PCOUNT', 0)
118        self._blank = None if ignore_blank else self._header.get('BLANK')
119        self._verify_blank()
120
121        self._orig_bitpix = self._bitpix
122        self._orig_blank = self._header.get('BLANK')
123
124        # These get set again below, but need to be set to sensible defaults
125        # here.
126        self._orig_bzero = self._bzero
127        self._orig_bscale = self._bscale
128
129        # Set the name attribute if it was provided (if this is an ImageHDU
130        # this will result in setting the EXTNAME keyword of the header as
131        # well)
132        if 'name' in kwargs and kwargs['name']:
133            self.name = kwargs['name']
134        if 'ver' in kwargs and kwargs['ver']:
135            self.ver = kwargs['ver']
136
137        # Set to True if the data or header is replaced, indicating that
138        # update_header should be called
139        self._modified = False
140
141        if data is DELAYED:
142            if (not do_not_scale_image_data and
143                    (self._bscale != 1 or self._bzero != 0)):
144                # This indicates that when the data is accessed or written out
145                # to a new file it will need to be rescaled
146                self._data_needs_rescale = True
147            return
148        else:
149            # Setting data will update the header and set _bitpix, _bzero,
150            # and _bscale to the appropriate BITPIX for the data, and always
151            # sets _bzero=0 and _bscale=1.
152            self.data = data
153
154            # Check again for BITPIX/BSCALE/BZERO in case they changed when the
155            # data was assigned. This can happen, for example, if the input
156            # data is an unsigned int numpy array.
157            self._bitpix = self._header.get('BITPIX')
158
159            # Do not provide default values for BZERO and BSCALE here because
160            # the keywords will have been deleted in the header if appropriate
161            # after scaling. We do not want to put them back in if they
162            # should not be there.
163            self._bzero = self._header.get('BZERO')
164            self._bscale = self._header.get('BSCALE')
165
166        # Handle case where there was no BZERO/BSCALE in the initial header
167        # but there should be a BSCALE/BZERO now that the data has been set.
168        if not bzero_in_header:
169            self._orig_bzero = self._bzero
170        if not bscale_in_header:
171            self._orig_bscale = self._bscale
172
173    @classmethod
174    def match_header(cls, header):
175        """
176        _ImageBaseHDU is sort of an abstract class for HDUs containing image
177        data (as opposed to table data) and should never be used directly.
178        """
179
180        raise NotImplementedError
181
182    @property
183    def is_image(self):
184        return True
185
186    @property
187    def section(self):
188        """
189        Access a section of the image array without loading the entire array
190        into memory.  The :class:`Section` object returned by this attribute is
191        not meant to be used directly by itself.  Rather, slices of the section
192        return the appropriate slice of the data, and loads *only* that section
193        into memory.
194
195        Sections are mostly obsoleted by memmap support, but should still be
196        used to deal with very large scaled images.  See the
197        :ref:`astropy:data-sections` section of the Astropy documentation for
198        more details.
199        """
200
201        return Section(self)
202
203    @property
204    def shape(self):
205        """
206        Shape of the image array--should be equivalent to ``self.data.shape``.
207        """
208
209        # Determine from the values read from the header
210        return tuple(reversed(self._axes))
211
212    @property
213    def header(self):
214        return self._header
215
216    @header.setter
217    def header(self, header):
218        self._header = header
219        self._modified = True
220        self.update_header()
221
222    @lazyproperty
223    def data(self):
224        """
225        Image/array data as a `~numpy.ndarray`.
226
227        Please remember that the order of axes on an Numpy array are opposite
228        of the order specified in the FITS file.  For example for a 2D image
229        the "rows" or y-axis are the first dimension, and the "columns" or
230        x-axis are the second dimension.
231
232        If the data is scaled using the BZERO and BSCALE parameters, this
233        attribute returns the data scaled to its physical values unless the
234        file was opened with ``do_not_scale_image_data=True``.
235        """
236
237        if len(self._axes) < 1:
238            return
239
240        data = self._get_scaled_image_data(self._data_offset, self.shape)
241        self._update_header_scale_info(data.dtype)
242
243        return data
244
245    @data.setter
246    def data(self, data):
247        if 'data' in self.__dict__ and self.__dict__['data'] is not None:
248            if self.__dict__['data'] is data:
249                return
250            else:
251                self._data_replaced = True
252            was_unsigned = _is_pseudo_integer(self.__dict__['data'].dtype)
253        else:
254            self._data_replaced = True
255            was_unsigned = False
256
257        if data is not None and not isinstance(data, (np.ndarray, DaskArray)):
258            # Try to coerce the data into a numpy array--this will work, on
259            # some level, for most objects
260            try:
261                data = np.array(data)
262            except Exception:
263                raise TypeError('data object {!r} could not be coerced into an '
264                                'ndarray'.format(data))
265
266            if data.shape == ():
267                raise TypeError('data object {!r} should have at least one '
268                                'dimension'.format(data))
269
270        self.__dict__['data'] = data
271        self._modified = True
272
273        if self.data is None:
274            self._axes = []
275        else:
276            # Set new values of bitpix, bzero, and bscale now, but wait to
277            # revise original values until header is updated.
278            self._bitpix = DTYPE2BITPIX[data.dtype.name]
279            self._bscale = 1
280            self._bzero = 0
281            self._blank = None
282            self._axes = list(data.shape)
283            self._axes.reverse()
284
285        # Update the header, including adding BZERO/BSCALE if new data is
286        # unsigned. Does not change the values of self._bitpix,
287        # self._orig_bitpix, etc.
288        self.update_header()
289        if (data is not None and was_unsigned):
290            self._update_header_scale_info(data.dtype)
291
292        # Keep _orig_bitpix as it was until header update is done, then
293        # set it, to allow easier handling of the case of unsigned
294        # integer data being converted to something else. Setting these here
295        # is needed only for the case do_not_scale_image_data=True when
296        # setting the data to unsigned int.
297
298        # If necessary during initialization, i.e. if BSCALE and BZERO were
299        # not in the header but the data was unsigned, the attributes below
300        # will be update in __init__.
301        self._orig_bitpix = self._bitpix
302        self._orig_bscale = self._bscale
303        self._orig_bzero = self._bzero
304
305        # returning the data signals to lazyproperty that we've already handled
306        # setting self.__dict__['data']
307        return data
308
309    def update_header(self):
310        """
311        Update the header keywords to agree with the data.
312        """
313
314        if not (self._modified or self._header._modified or
315                (self._has_data and self.shape != self.data.shape)):
316            # Not likely that anything needs updating
317            return
318
319        old_naxis = self._header.get('NAXIS', 0)
320
321        if 'BITPIX' not in self._header:
322            bitpix_comment = self.standard_keyword_comments['BITPIX']
323        else:
324            bitpix_comment = self._header.comments['BITPIX']
325
326        # Update the BITPIX keyword and ensure it's in the correct
327        # location in the header
328        self._header.set('BITPIX', self._bitpix, bitpix_comment, after=0)
329
330        # If the data's shape has changed (this may have happened without our
331        # noticing either via a direct update to the data.shape attribute) we
332        # need to update the internal self._axes
333        if self._has_data and self.shape != self.data.shape:
334            self._axes = list(self.data.shape)
335            self._axes.reverse()
336
337        # Update the NAXIS keyword and ensure it's in the correct location in
338        # the header
339        if 'NAXIS' in self._header:
340            naxis_comment = self._header.comments['NAXIS']
341        else:
342            naxis_comment = self.standard_keyword_comments['NAXIS']
343        self._header.set('NAXIS', len(self._axes), naxis_comment,
344                         after='BITPIX')
345
346        # TODO: This routine is repeated in several different classes--it
347        # should probably be made available as a method on all standard HDU
348        # types
349        # add NAXISi if it does not exist
350        for idx, axis in enumerate(self._axes):
351            naxisn = 'NAXIS' + str(idx + 1)
352            if naxisn in self._header:
353                self._header[naxisn] = axis
354            else:
355                if (idx == 0):
356                    after = 'NAXIS'
357                else:
358                    after = 'NAXIS' + str(idx)
359                self._header.set(naxisn, axis, after=after)
360
361        # delete extra NAXISi's
362        for idx in range(len(self._axes) + 1, old_naxis + 1):
363            try:
364                del self._header['NAXIS' + str(idx)]
365            except KeyError:
366                pass
367
368        if 'BLANK' in self._header:
369            self._blank = self._header['BLANK']
370
371        # Add BSCALE/BZERO to header if data is unsigned int.
372        self._update_pseudo_int_scale_keywords()
373
374        self._modified = False
375
376    def _update_header_scale_info(self, dtype=None):
377        """
378        Delete BSCALE/BZERO from header if necessary.
379        """
380
381        # Note that _dtype_for_bitpix determines the dtype based on the
382        # "original" values of bitpix, bscale, and bzero, stored in
383        # self._orig_bitpix, etc. It contains the logic for determining which
384        # special cases of BZERO/BSCALE, if any, are auto-detected as following
385        # the FITS unsigned int convention.
386
387        # Added original_was_unsigned with the intent of facilitating the
388        # special case of do_not_scale_image_data=True and uint=True
389        # eventually.
390        # FIXME: unused, maybe it should be useful?
391        # if self._dtype_for_bitpix() is not None:
392        #     original_was_unsigned = self._dtype_for_bitpix().kind == 'u'
393        # else:
394        #     original_was_unsigned = False
395
396        if (self._do_not_scale_image_data or
397                (self._orig_bzero == 0 and self._orig_bscale == 1)):
398            return
399
400        if dtype is None:
401            dtype = self._dtype_for_bitpix()
402
403        if (dtype is not None and dtype.kind == 'u' and
404                (self._scale_back or self._scale_back is None)):
405            # Data is pseudo-unsigned integers, and the scale_back option
406            # was not explicitly set to False, so preserve all the scale
407            # factors
408            return
409
410        for keyword in ['BSCALE', 'BZERO']:
411            try:
412                del self._header[keyword]
413                # Since _update_header_scale_info can, currently, be called
414                # *after* _prewriteto(), replace these with blank cards so
415                # the header size doesn't change
416                self._header.append()
417            except KeyError:
418                pass
419
420        if dtype is None:
421            dtype = self._dtype_for_bitpix()
422        if dtype is not None:
423            self._header['BITPIX'] = DTYPE2BITPIX[dtype.name]
424
425        self._bzero = 0
426        self._bscale = 1
427        self._bitpix = self._header['BITPIX']
428        self._blank = self._header.pop('BLANK', None)
429
430    def scale(self, type=None, option='old', bscale=None, bzero=None):
431        """
432        Scale image data by using ``BSCALE``/``BZERO``.
433
434        Call to this method will scale `data` and update the keywords of
435        ``BSCALE`` and ``BZERO`` in the HDU's header.  This method should only
436        be used right before writing to the output file, as the data will be
437        scaled and is therefore not very usable after the call.
438
439        Parameters
440        ----------
441        type : str, optional
442            destination data type, use a string representing a numpy
443            dtype name, (e.g. ``'uint8'``, ``'int16'``, ``'float32'``
444            etc.).  If is `None`, use the current data type.
445
446        option : str, optional
447            How to scale the data: ``"old"`` uses the original ``BSCALE`` and
448            ``BZERO`` values from when the data was read/created (defaulting to
449            1 and 0 if they don't exist). For integer data only, ``"minmax"``
450            uses the minimum and maximum of the data to scale. User-specified
451            ``bscale``/``bzero`` values always take precedence.
452
453        bscale, bzero : int, optional
454            User-specified ``BSCALE`` and ``BZERO`` values
455        """
456
457        # Disable blank support for now
458        self._scale_internal(type=type, option=option, bscale=bscale,
459                             bzero=bzero, blank=None)
460
461    def _scale_internal(self, type=None, option='old', bscale=None, bzero=None,
462                        blank=0):
463        """
464        This is an internal implementation of the `scale` method, which
465        also supports handling BLANK properly.
466
467        TODO: This is only needed for fixing #3865 without introducing any
468        public API changes.  We should support BLANK better when rescaling
469        data, and when that is added the need for this internal interface
470        should go away.
471
472        Note: the default of ``blank=0`` merely reflects the current behavior,
473        and is not necessarily a deliberate choice (better would be to disallow
474        conversion of floats to ints without specifying a BLANK if there are
475        NaN/inf values).
476        """
477
478        if self.data is None:
479            return
480
481        # Determine the destination (numpy) data type
482        if type is None:
483            type = BITPIX2DTYPE[self._bitpix]
484        _type = getattr(np, type)
485
486        # Determine how to scale the data
487        # bscale and bzero takes priority
488        if bscale is not None and bzero is not None:
489            _scale = bscale
490            _zero = bzero
491        elif bscale is not None:
492            _scale = bscale
493            _zero = 0
494        elif bzero is not None:
495            _scale = 1
496            _zero = bzero
497        elif (option == 'old' and self._orig_bscale is not None and
498                self._orig_bzero is not None):
499            _scale = self._orig_bscale
500            _zero = self._orig_bzero
501        elif option == 'minmax' and not issubclass(_type, np.floating):
502            if isinstance(self.data, DaskArray):
503                min = self.data.min().compute()
504                max = self.data.max().compute()
505            else:
506                min = np.minimum.reduce(self.data.flat)
507                max = np.maximum.reduce(self.data.flat)
508
509            if _type == np.uint8:  # uint8 case
510                _zero = min
511                _scale = (max - min) / (2.0 ** 8 - 1)
512            else:
513                _zero = (max + min) / 2.0
514
515                # throw away -2^N
516                nbytes = 8 * _type().itemsize
517                _scale = (max - min) / (2.0 ** nbytes - 2)
518        else:
519            _scale = 1
520            _zero = 0
521
522        # Do the scaling
523        if _zero != 0:
524            if isinstance(self.data, DaskArray):
525                self.data = self.data - _zero
526            else:
527                # 0.9.6.3 to avoid out of range error for BZERO = +32768
528                # We have to explicitly cast _zero to prevent numpy from raising an
529                # error when doing self.data -= zero, and we do this instead of
530                # self.data = self.data - zero to avoid doubling memory usage.
531                np.add(self.data, -_zero, out=self.data, casting='unsafe')
532            self._header['BZERO'] = _zero
533        else:
534            try:
535                del self._header['BZERO']
536            except KeyError:
537                pass
538
539        if _scale and _scale != 1:
540            self.data = self.data / _scale
541            self._header['BSCALE'] = _scale
542        else:
543            try:
544                del self._header['BSCALE']
545            except KeyError:
546                pass
547
548        # Set blanks
549        if blank is not None and issubclass(_type, np.integer):
550            # TODO: Perhaps check that the requested BLANK value fits in the
551            # integer type being scaled to?
552            self.data[np.isnan(self.data)] = blank
553            self._header['BLANK'] = blank
554
555        if self.data.dtype.type != _type:
556            self.data = np.array(np.around(self.data), dtype=_type)
557
558        # Update the BITPIX Card to match the data
559        self._bitpix = DTYPE2BITPIX[self.data.dtype.name]
560        self._bzero = self._header.get('BZERO', 0)
561        self._bscale = self._header.get('BSCALE', 1)
562        self._blank = blank
563        self._header['BITPIX'] = self._bitpix
564
565        # Since the image has been manually scaled, the current
566        # bitpix/bzero/bscale now serve as the 'original' scaling of the image,
567        # as though the original image has been completely replaced
568        self._orig_bitpix = self._bitpix
569        self._orig_bzero = self._bzero
570        self._orig_bscale = self._bscale
571        self._orig_blank = self._blank
572
573    def _verify(self, option='warn'):
574        # update_header can fix some things that would otherwise cause
575        # verification to fail, so do that now...
576        self.update_header()
577        self._verify_blank()
578
579        return super()._verify(option)
580
581    def _verify_blank(self):
582        # Probably not the best place for this (it should probably happen
583        # in _verify as well) but I want to be able to raise this warning
584        # both when the HDU is created and when written
585        if self._blank is None:
586            return
587
588        messages = []
589        # TODO: Once the FITSSchema framewhere is merged these warnings
590        # should be handled by the schema
591        if not _is_int(self._blank):
592            messages.append(
593                "Invalid value for 'BLANK' keyword in header: {!r} "
594                "The 'BLANK' keyword must be an integer.  It will be "
595                "ignored in the meantime.".format(self._blank))
596            self._blank = None
597        if not self._bitpix > 0:
598            messages.append(
599                "Invalid 'BLANK' keyword in header.  The 'BLANK' keyword "
600                "is only applicable to integer data, and will be ignored "
601                "in this HDU.")
602            self._blank = None
603
604        for msg in messages:
605            warnings.warn(msg, VerifyWarning)
606
607    def _prewriteto(self, checksum=False, inplace=False):
608        if self._scale_back:
609            self._scale_internal(BITPIX2DTYPE[self._orig_bitpix],
610                                 blank=self._orig_blank)
611
612        self.update_header()
613        if not inplace and self._data_needs_rescale:
614            # Go ahead and load the scaled image data and update the header
615            # with the correct post-rescaling headers
616            _ = self.data
617
618        return super()._prewriteto(checksum, inplace)
619
620    def _writedata_internal(self, fileobj):
621        size = 0
622
623        if self.data is None:
624            return size
625        elif isinstance(self.data, DaskArray):
626            return self._writeinternal_dask(fileobj)
627        else:
628            # Based on the system type, determine the byteorders that
629            # would need to be swapped to get to big-endian output
630            if sys.byteorder == 'little':
631                swap_types = ('<', '=')
632            else:
633                swap_types = ('<',)
634            # deal with unsigned integer 16, 32 and 64 data
635            if _is_pseudo_integer(self.data.dtype):
636                # Convert the unsigned array to signed
637                output = np.array(
638                    self.data - _pseudo_zero(self.data.dtype),
639                    dtype=f'>i{self.data.dtype.itemsize}')
640                should_swap = False
641            else:
642                output = self.data
643                byteorder = output.dtype.str[0]
644                should_swap = (byteorder in swap_types)
645
646            if should_swap:
647                if output.flags.writeable:
648                    output.byteswap(True)
649                    try:
650                        fileobj.writearray(output)
651                    finally:
652                        output.byteswap(True)
653                else:
654                    # For read-only arrays, there is no way around making
655                    # a byteswapped copy of the data.
656                    fileobj.writearray(output.byteswap(False))
657            else:
658                fileobj.writearray(output)
659
660            size += output.size * output.itemsize
661
662            return size
663
664    def _writeinternal_dask(self, fileobj):
665
666        if sys.byteorder == 'little':
667            swap_types = ('<', '=')
668        else:
669            swap_types = ('<',)
670        # deal with unsigned integer 16, 32 and 64 data
671        if _is_pseudo_integer(self.data.dtype):
672            raise NotImplementedError("This dtype isn't currently supported with dask.")
673        else:
674            output = self.data
675            byteorder = output.dtype.str[0]
676            should_swap = (byteorder in swap_types)
677
678        if should_swap:
679            from dask.utils import M
680            # NOTE: the inplace flag to byteswap needs to be False otherwise the array is
681            # byteswapped in place every time it is computed and this affects
682            # the input dask array.
683            output = output.map_blocks(M.byteswap, False).map_blocks(M.newbyteorder, "S")
684
685        initial_position = fileobj.tell()
686        n_bytes = output.nbytes
687
688        # Extend the file n_bytes into the future
689        fileobj.seek(initial_position + n_bytes - 1)
690        fileobj.write(b'\0')
691        fileobj.flush()
692
693        if fileobj.fileobj_mode not in ('rb+', 'wb+', 'ab+'):
694            # Use another file handle if the current one is not in
695            # read/write mode
696            fp = open(fileobj.name, mode='rb+')
697            should_close = True
698        else:
699            fp = fileobj._file
700            should_close = False
701
702        try:
703            outmmap = mmap.mmap(fp.fileno(),
704                                length=initial_position + n_bytes,
705                                access=mmap.ACCESS_WRITE)
706
707            outarr = np.ndarray(shape=output.shape,
708                                dtype=output.dtype,
709                                offset=initial_position,
710                                buffer=outmmap)
711
712            output.store(outarr, lock=True, compute=True)
713        finally:
714            if should_close:
715                fp.close()
716            outmmap.close()
717
718        # On Windows closing the memmap causes the file pointer to return to 0, so
719        # we need to go back to the end of the data (since padding may be written
720        # after)
721        fileobj.seek(initial_position + n_bytes)
722
723        return n_bytes
724
725    def _dtype_for_bitpix(self):
726        """
727        Determine the dtype that the data should be converted to depending on
728        the BITPIX value in the header, and possibly on the BSCALE value as
729        well.  Returns None if there should not be any change.
730        """
731
732        bitpix = self._orig_bitpix
733        # Handle possible conversion to uints if enabled
734        if self._uint and self._orig_bscale == 1:
735            if bitpix == 8 and self._orig_bzero == -128:
736                return np.dtype('int8')
737
738            for bits, dtype in ((16, np.dtype('uint16')),
739                                (32, np.dtype('uint32')),
740                                (64, np.dtype('uint64'))):
741                if bitpix == bits and self._orig_bzero == 1 << (bits - 1):
742                    return dtype
743
744        if bitpix > 16:  # scale integers to Float64
745            return np.dtype('float64')
746        elif bitpix > 0:  # scale integers to Float32
747            return np.dtype('float32')
748
749    def _convert_pseudo_integer(self, data):
750        """
751        Handle "pseudo-unsigned" integers, if the user requested it.  Returns
752        the converted data array if so; otherwise returns None.
753
754        In this case case, we don't need to handle BLANK to convert it to NAN,
755        since we can't do NaNs with integers, anyway, i.e. the user is
756        responsible for managing blanks.
757        """
758
759        dtype = self._dtype_for_bitpix()
760        # bool(dtype) is always False--have to explicitly compare to None; this
761        # caused a fair amount of hair loss
762        if dtype is not None and dtype.kind == 'u':
763            # Convert the input raw data into an unsigned integer array and
764            # then scale the data adjusting for the value of BZERO.  Note that
765            # we subtract the value of BZERO instead of adding because of the
766            # way numpy converts the raw signed array into an unsigned array.
767            bits = dtype.itemsize * 8
768            data = np.array(data, dtype=dtype)
769            data -= np.uint64(1 << (bits - 1))
770
771            return data
772
773    def _get_scaled_image_data(self, offset, shape):
774        """
775        Internal function for reading image data from a file and apply scale
776        factors to it.  Normally this is used for the entire image, but it
777        supports alternate offset/shape for Section support.
778        """
779
780        code = BITPIX2DTYPE[self._orig_bitpix]
781
782        raw_data = self._get_raw_data(shape, code, offset)
783        raw_data.dtype = raw_data.dtype.newbyteorder('>')
784
785        if self._do_not_scale_image_data or (
786                self._orig_bzero == 0 and self._orig_bscale == 1 and
787                self._blank is None):
788            # No further conversion of the data is necessary
789            return raw_data
790
791        try:
792            if self._file.strict_memmap:
793                raise ValueError("Cannot load a memory-mapped image: "
794                                 "BZERO/BSCALE/BLANK header keywords present. "
795                                 "Set memmap=False.")
796        except AttributeError:  # strict_memmap not set
797            pass
798
799        data = None
800        if not (self._orig_bzero == 0 and self._orig_bscale == 1):
801            data = self._convert_pseudo_integer(raw_data)
802
803        if data is None:
804            # In these cases, we end up with floating-point arrays and have to
805            # apply bscale and bzero. We may have to handle BLANK and convert
806            # to NaN in the resulting floating-point arrays.
807            # The BLANK keyword should only be applied for integer data (this
808            # is checked in __init__ but it can't hurt to double check here)
809            blanks = None
810
811            if self._blank is not None and self._bitpix > 0:
812                blanks = raw_data.flat == self._blank
813                # The size of blanks in bytes is the number of elements in
814                # raw_data.flat.  However, if we use np.where instead we will
815                # only use 8 bytes for each index where the condition is true.
816                # So if the number of blank items is fewer than
817                # len(raw_data.flat) / 8, using np.where will use less memory
818                if blanks.sum() < len(blanks) / 8:
819                    blanks = np.where(blanks)
820
821            new_dtype = self._dtype_for_bitpix()
822            if new_dtype is not None:
823                data = np.array(raw_data, dtype=new_dtype)
824            else:  # floating point cases
825                if self._file is not None and self._file.memmap:
826                    data = raw_data.copy()
827                elif not raw_data.flags.writeable:
828                    # create a writeable copy if needed
829                    data = raw_data.copy()
830                # if not memmap, use the space already in memory
831                else:
832                    data = raw_data
833
834            del raw_data
835
836            if self._orig_bscale != 1:
837                np.multiply(data, self._orig_bscale, data)
838            if self._orig_bzero != 0:
839                data += self._orig_bzero
840
841            if self._blank:
842                data.flat[blanks] = np.nan
843
844        return data
845
846    def _summary(self):
847        """
848        Summarize the HDU: name, dimensions, and formats.
849        """
850
851        class_name = self.__class__.__name__
852
853        # if data is touched, use data info.
854        if self._data_loaded:
855            if self.data is None:
856                format = ''
857            else:
858                format = self.data.dtype.name
859                format = format[format.rfind('.')+1:]
860        else:
861            if self.shape and all(self.shape):
862                # Only show the format if all the dimensions are non-zero
863                # if data is not touched yet, use header info.
864                format = BITPIX2DTYPE[self._bitpix]
865            else:
866                format = ''
867
868            if (format and not self._do_not_scale_image_data and
869                    (self._orig_bscale != 1 or self._orig_bzero != 0)):
870                new_dtype = self._dtype_for_bitpix()
871                if new_dtype is not None:
872                    format += f' (rescales to {new_dtype.name})'
873
874        # Display shape in FITS-order
875        shape = tuple(reversed(self.shape))
876
877        return (self.name, self.ver, class_name, len(self._header), shape, format, '')
878
879    def _calculate_datasum(self):
880        """
881        Calculate the value for the ``DATASUM`` card in the HDU.
882        """
883
884        if self._has_data:
885
886            # We have the data to be used.
887            d = self.data
888
889            # First handle the special case where the data is unsigned integer
890            # 16, 32 or 64
891            if _is_pseudo_integer(self.data.dtype):
892                d = np.array(self.data - _pseudo_zero(self.data.dtype),
893                             dtype=f'i{self.data.dtype.itemsize}')
894
895            # Check the byte order of the data.  If it is little endian we
896            # must swap it before calculating the datasum.
897            if d.dtype.str[0] != '>':
898                if d.flags.writeable:
899                    byteswapped = True
900                    d = d.byteswap(True)
901                    d.dtype = d.dtype.newbyteorder('>')
902                else:
903                    # If the data is not writeable, we just make a byteswapped
904                    # copy and don't bother changing it back after
905                    d = d.byteswap(False)
906                    d.dtype = d.dtype.newbyteorder('>')
907                    byteswapped = False
908            else:
909                byteswapped = False
910
911            cs = self._compute_checksum(d.flatten().view(np.uint8))
912
913            # If the data was byteswapped in this method then return it to
914            # its original little-endian order.
915            if byteswapped and not _is_pseudo_integer(self.data.dtype):
916                d.byteswap(True)
917                d.dtype = d.dtype.newbyteorder('<')
918
919            return cs
920        else:
921            # This is the case where the data has not been read from the file
922            # yet.  We can handle that in a generic manner so we do it in the
923            # base class.  The other possibility is that there is no data at
924            # all.  This can also be handled in a generic manner.
925            return super()._calculate_datasum()
926
927
928class Section:
929    """
930    Image section.
931
932    Slices of this object load the corresponding section of an image array from
933    the underlying FITS file on disk, and applies any BSCALE/BZERO factors.
934
935    Section slices cannot be assigned to, and modifications to a section are
936    not saved back to the underlying file.
937
938    See the :ref:`astropy:data-sections` section of the Astropy documentation
939    for more details.
940    """
941
942    def __init__(self, hdu):
943        self.hdu = hdu
944
945    def __getitem__(self, key):
946        if not isinstance(key, tuple):
947            key = (key,)
948        naxis = len(self.hdu.shape)
949        return_scalar = (all(isinstance(k, (int, np.integer)) for k in key)
950                         and len(key) == naxis)
951        if not any(k is Ellipsis for k in key):
952            # We can always add a ... at the end, after making note of whether
953            # to return a scalar.
954            key += Ellipsis,
955        ellipsis_count = len([k for k in key if k is Ellipsis])
956        if len(key) - ellipsis_count > naxis or ellipsis_count > 1:
957            raise IndexError('too many indices for array')
958        # Insert extra dimensions as needed.
959        idx = next(i for i, k in enumerate(key + (Ellipsis,)) if k is Ellipsis)
960        key = key[:idx] + (slice(None),) * (naxis - len(key) + 1) + key[idx+1:]
961        return_0dim = (all(isinstance(k, (int, np.integer)) for k in key)
962                       and len(key) == naxis)
963
964        dims = []
965        offset = 0
966        # Find all leading axes for which a single point is used.
967        for idx in range(naxis):
968            axis = self.hdu.shape[idx]
969            indx = _IndexInfo(key[idx], axis)
970            offset = offset * axis + indx.offset
971            if not _is_int(key[idx]):
972                dims.append(indx.npts)
973                break
974
975        is_contiguous = indx.contiguous
976        for jdx in range(idx + 1, naxis):
977            axis = self.hdu.shape[jdx]
978            indx = _IndexInfo(key[jdx], axis)
979            dims.append(indx.npts)
980            if indx.npts == axis and indx.contiguous:
981                # The offset needs to multiply the length of all remaining axes
982                offset *= axis
983            else:
984                is_contiguous = False
985
986        if is_contiguous:
987            dims = tuple(dims) or (1,)
988            bitpix = self.hdu._orig_bitpix
989            offset = self.hdu._data_offset + offset * abs(bitpix) // 8
990            data = self.hdu._get_scaled_image_data(offset, dims)
991        else:
992            data = self._getdata(key)
993
994        if return_scalar:
995            data = data.item()
996        elif return_0dim:
997            data = data.squeeze()
998        return data
999
1000    def _getdata(self, keys):
1001        for idx, (key, axis) in enumerate(zip(keys, self.hdu.shape)):
1002            if isinstance(key, slice):
1003                ks = range(*key.indices(axis))
1004                break
1005            elif isiterable(key):
1006                # Handle both integer and boolean arrays.
1007                ks = np.arange(axis, dtype=int)[key]
1008                break
1009            # This should always break at some point if _getdata is called.
1010
1011        data = [self[keys[:idx] + (k,) + keys[idx + 1:]] for k in ks]
1012
1013        if any(isinstance(key, slice) or isiterable(key)
1014               for key in keys[idx + 1:]):
1015            # data contains multidimensional arrays; combine them.
1016            return np.array(data)
1017        else:
1018            # Only singleton dimensions remain; concatenate in a 1D array.
1019            return np.concatenate([np.atleast_1d(array) for array in data])
1020
1021
1022class PrimaryHDU(_ImageBaseHDU):
1023    """
1024    FITS primary HDU class.
1025    """
1026
1027    _default_name = 'PRIMARY'
1028
1029    def __init__(self, data=None, header=None, do_not_scale_image_data=False,
1030                 ignore_blank=False,
1031                 uint=True, scale_back=None):
1032        """
1033        Construct a primary HDU.
1034
1035        Parameters
1036        ----------
1037        data : array or ``astropy.io.fits.hdu.base.DELAYED``, optional
1038            The data in the HDU.
1039
1040        header : `~astropy.io.fits.Header`, optional
1041            The header to be used (as a template).  If ``header`` is `None`, a
1042            minimal header will be provided.
1043
1044        do_not_scale_image_data : bool, optional
1045            If `True`, image data is not scaled using BSCALE/BZERO values
1046            when read. (default: False)
1047
1048        ignore_blank : bool, optional
1049            If `True`, the BLANK header keyword will be ignored if present.
1050            Otherwise, pixels equal to this value will be replaced with
1051            NaNs. (default: False)
1052
1053        uint : bool, optional
1054            Interpret signed integer data where ``BZERO`` is the
1055            central value and ``BSCALE == 1`` as unsigned integer
1056            data.  For example, ``int16`` data with ``BZERO = 32768``
1057            and ``BSCALE = 1`` would be treated as ``uint16`` data.
1058            (default: True)
1059
1060        scale_back : bool, optional
1061            If `True`, when saving changes to a file that contained scaled
1062            image data, restore the data to the original type and reapply the
1063            original BSCALE/BZERO values.  This could lead to loss of accuracy
1064            if scaling back to integer values after performing floating point
1065            operations on the data.  Pseudo-unsigned integers are automatically
1066            rescaled unless scale_back is explicitly set to `False`.
1067            (default: None)
1068        """
1069
1070        super().__init__(
1071            data=data, header=header,
1072            do_not_scale_image_data=do_not_scale_image_data, uint=uint,
1073            ignore_blank=ignore_blank,
1074            scale_back=scale_back)
1075
1076        # insert the keywords EXTEND
1077        if header is None:
1078            dim = self._header['NAXIS']
1079            if dim == 0:
1080                dim = ''
1081            self._header.set('EXTEND', True, after='NAXIS' + str(dim))
1082
1083    @classmethod
1084    def match_header(cls, header):
1085        card = header.cards[0]
1086        # Due to problems discussed in #5808, we cannot assume the 'GROUPS'
1087        # keyword to be True/False, have to check the value
1088        return (card.keyword == 'SIMPLE' and
1089                ('GROUPS' not in header or header['GROUPS'] != True) and  # noqa
1090                card.value)
1091
1092    def update_header(self):
1093        super().update_header()
1094
1095        # Update the position of the EXTEND keyword if it already exists
1096        if 'EXTEND' in self._header:
1097            if len(self._axes):
1098                after = 'NAXIS' + str(len(self._axes))
1099            else:
1100                after = 'NAXIS'
1101            self._header.set('EXTEND', after=after)
1102
1103    def _verify(self, option='warn'):
1104        errs = super()._verify(option=option)
1105
1106        # Verify location and value of mandatory keywords.
1107        # The EXTEND keyword is only mandatory if the HDU has extensions; this
1108        # condition is checked by the HDUList object.  However, if we already
1109        # have an EXTEND keyword check that its position is correct
1110        if 'EXTEND' in self._header:
1111            naxis = self._header.get('NAXIS', 0)
1112            self.req_cards('EXTEND', naxis + 3, lambda v: isinstance(v, bool),
1113                           True, option, errs)
1114        return errs
1115
1116
1117class ImageHDU(_ImageBaseHDU, ExtensionHDU):
1118    """
1119    FITS image extension HDU class.
1120    """
1121
1122    _extension = 'IMAGE'
1123
1124    def __init__(self, data=None, header=None, name=None,
1125                 do_not_scale_image_data=False, uint=True, scale_back=None,
1126                 ver=None):
1127        """
1128        Construct an image HDU.
1129
1130        Parameters
1131        ----------
1132        data : array
1133            The data in the HDU.
1134
1135        header : `~astropy.io.fits.Header`
1136            The header to be used (as a template).  If ``header`` is
1137            `None`, a minimal header will be provided.
1138
1139        name : str, optional
1140            The name of the HDU, will be the value of the keyword
1141            ``EXTNAME``.
1142
1143        do_not_scale_image_data : bool, optional
1144            If `True`, image data is not scaled using BSCALE/BZERO values
1145            when read. (default: False)
1146
1147        uint : bool, optional
1148            Interpret signed integer data where ``BZERO`` is the
1149            central value and ``BSCALE == 1`` as unsigned integer
1150            data.  For example, ``int16`` data with ``BZERO = 32768``
1151            and ``BSCALE = 1`` would be treated as ``uint16`` data.
1152            (default: True)
1153
1154        scale_back : bool, optional
1155            If `True`, when saving changes to a file that contained scaled
1156            image data, restore the data to the original type and reapply the
1157            original BSCALE/BZERO values.  This could lead to loss of accuracy
1158            if scaling back to integer values after performing floating point
1159            operations on the data.  Pseudo-unsigned integers are automatically
1160            rescaled unless scale_back is explicitly set to `False`.
1161            (default: None)
1162
1163        ver : int > 0 or None, optional
1164            The ver of the HDU, will be the value of the keyword ``EXTVER``.
1165            If not given or None, it defaults to the value of the ``EXTVER``
1166            card of the ``header`` or 1.
1167            (default: None)
1168        """
1169
1170        # This __init__ currently does nothing differently from the base class,
1171        # and is only explicitly defined for the docstring.
1172
1173        super().__init__(
1174            data=data, header=header, name=name,
1175            do_not_scale_image_data=do_not_scale_image_data, uint=uint,
1176            scale_back=scale_back, ver=ver)
1177
1178    @classmethod
1179    def match_header(cls, header):
1180        card = header.cards[0]
1181        xtension = card.value
1182        if isinstance(xtension, str):
1183            xtension = xtension.rstrip()
1184        return card.keyword == 'XTENSION' and xtension == cls._extension
1185
1186    def _verify(self, option='warn'):
1187        """
1188        ImageHDU verify method.
1189        """
1190
1191        errs = super()._verify(option=option)
1192        naxis = self._header.get('NAXIS', 0)
1193        # PCOUNT must == 0, GCOUNT must == 1; the former is verified in
1194        # ExtensionHDU._verify, however ExtensionHDU._verify allows PCOUNT
1195        # to be >= 0, so we need to check it here
1196        self.req_cards('PCOUNT', naxis + 3, lambda v: (_is_int(v) and v == 0),
1197                       0, option, errs)
1198        return errs
1199
1200
1201class _IndexInfo:
1202    def __init__(self, indx, naxis):
1203        if _is_int(indx):
1204            if 0 <= indx < naxis:
1205                self.npts = 1
1206                self.offset = indx
1207                self.contiguous = True
1208            else:
1209                raise IndexError(f'Index {indx} out of range.')
1210        elif isinstance(indx, slice):
1211            start, stop, step = indx.indices(naxis)
1212            self.npts = (stop - start) // step
1213            self.offset = start
1214            self.contiguous = step == 1
1215        elif isiterable(indx):
1216            self.npts = len(indx)
1217            self.offset = 0
1218            self.contiguous = False
1219        else:
1220            raise IndexError(f'Illegal index {indx}')
1221