1# Licensed under a 3-clause BSD style license - see PYFITS.rst
2
3
4import contextlib
5import csv
6import operator
7import os
8import re
9import sys
10import textwrap
11import warnings
12from contextlib import suppress
13
14import numpy as np
15from numpy import char as chararray
16
17from .base import DELAYED, _ValidHDU, ExtensionHDU
18# This module may have many dependencies on astropy.io.fits.column, but
19# astropy.io.fits.column has fewer dependencies overall, so it's easier to
20# keep table/column-related utilities in astropy.io.fits.column
21from astropy.io.fits.column import (FITS2NUMPY, KEYWORD_NAMES, KEYWORD_TO_ATTRIBUTE,
22                      ATTRIBUTE_TO_KEYWORD, TDEF_RE, Column, ColDefs,
23                      _AsciiColDefs, _FormatP, _FormatQ, _makep,
24                      _parse_tformat, _scalar_to_format, _convert_format,
25                      _cmp_recformats)
26from astropy.io.fits.fitsrec import FITS_rec, _get_recarray_field, _has_unicode_fields
27from astropy.io.fits.header import Header, _pad_length
28from astropy.io.fits.util import _is_int, _str_to_num
29
30from astropy.utils import lazyproperty
31from astropy.utils.exceptions import AstropyDeprecationWarning
32from astropy.utils.decorators import deprecated_renamed_argument
33
34
35class FITSTableDumpDialect(csv.excel):
36    """
37    A CSV dialect for the Astropy format of ASCII dumps of FITS tables.
38    """
39
40    delimiter = ' '
41    lineterminator = '\n'
42    quotechar = '"'
43    quoting = csv.QUOTE_ALL
44    skipinitialspace = True
45
46
47class _TableLikeHDU(_ValidHDU):
48    """
49    A class for HDUs that have table-like data.  This is used for both
50    Binary/ASCII tables as well as Random Access Group HDUs (which are
51    otherwise too dissimilar for tables to use _TableBaseHDU directly).
52    """
53
54    _data_type = FITS_rec
55    _columns_type = ColDefs
56
57    # TODO: Temporary flag representing whether uints are enabled; remove this
58    # after restructuring to support uints by default on a per-column basis
59    _uint = False
60
61    @classmethod
62    def match_header(cls, header):
63        """
64        This is an abstract HDU type for HDUs that contain table-like data.
65        This is even more abstract than _TableBaseHDU which is specifically for
66        the standard ASCII and Binary Table types.
67        """
68
69        raise NotImplementedError
70
71    @classmethod
72    def from_columns(cls, columns, header=None, nrows=0, fill=False,
73                     character_as_bytes=False, **kwargs):
74        """
75        Given either a `ColDefs` object, a sequence of `Column` objects,
76        or another table HDU or table data (a `FITS_rec` or multi-field
77        `numpy.ndarray` or `numpy.recarray` object, return a new table HDU of
78        the class this method was called on using the column definition from
79        the input.
80
81        See also `FITS_rec.from_columns`.
82
83        Parameters
84        ----------
85        columns : sequence of `Column`, `ColDefs` -like
86            The columns from which to create the table data, or an object with
87            a column-like structure from which a `ColDefs` can be instantiated.
88            This includes an existing `BinTableHDU` or `TableHDU`, or a
89            `numpy.recarray` to give some examples.
90
91            If these columns have data arrays attached that data may be used in
92            initializing the new table.  Otherwise the input columns will be
93            used as a template for a new table with the requested number of
94            rows.
95
96        header : `Header`
97            An optional `Header` object to instantiate the new HDU yet.  Header
98            keywords specifically related to defining the table structure (such
99            as the "TXXXn" keywords like TTYPEn) will be overridden by the
100            supplied column definitions, but all other informational and data
101            model-specific keywords are kept.
102
103        nrows : int
104            Number of rows in the new table.  If the input columns have data
105            associated with them, the size of the largest input column is used.
106            Otherwise the default is 0.
107
108        fill : bool
109            If `True`, will fill all cells with zeros or blanks.  If `False`,
110            copy the data from input, undefined cells will still be filled with
111            zeros/blanks.
112
113        character_as_bytes : bool
114            Whether to return bytes for string columns when accessed from the
115            HDU. By default this is `False` and (unicode) strings are returned,
116            but for large tables this may use up a lot of memory.
117
118        Notes
119        -----
120
121        Any additional keyword arguments accepted by the HDU class's
122        ``__init__`` may also be passed in as keyword arguments.
123        """
124
125        coldefs = cls._columns_type(columns)
126        data = FITS_rec.from_columns(coldefs, nrows=nrows, fill=fill,
127                                     character_as_bytes=character_as_bytes)
128        hdu = cls(data=data, header=header, character_as_bytes=character_as_bytes, **kwargs)
129        coldefs._add_listener(hdu)
130        return hdu
131
132    @lazyproperty
133    def columns(self):
134        """
135        The :class:`ColDefs` objects describing the columns in this table.
136        """
137
138        # The base class doesn't make any assumptions about where the column
139        # definitions come from, so just return an empty ColDefs
140        return ColDefs([])
141
142    @property
143    def _nrows(self):
144        """
145        table-like HDUs must provide an attribute that specifies the number of
146        rows in the HDU's table.
147
148        For now this is an internal-only attribute.
149        """
150
151        raise NotImplementedError
152
153    def _get_tbdata(self):
154        """Get the table data from an input HDU object."""
155
156        columns = self.columns
157
158        # TODO: Details related to variable length arrays need to be dealt with
159        # specifically in the BinTableHDU class, since they're a detail
160        # specific to FITS binary tables
161        if (any(type(r) in (_FormatP, _FormatQ)
162                for r in columns._recformats) and
163                self._data_size is not None and
164                self._data_size > self._theap):
165            # We have a heap; include it in the raw_data
166            raw_data = self._get_raw_data(self._data_size, np.uint8,
167                                          self._data_offset)
168            tbsize = self._header['NAXIS1'] * self._header['NAXIS2']
169            data = raw_data[:tbsize].view(dtype=columns.dtype,
170                                          type=np.rec.recarray)
171        else:
172            raw_data = self._get_raw_data(self._nrows, columns.dtype,
173                                          self._data_offset)
174            if raw_data is None:
175                # This can happen when a brand new table HDU is being created
176                # and no data has been assigned to the columns, which case just
177                # return an empty array
178                raw_data = np.array([], dtype=columns.dtype)
179
180            data = raw_data.view(np.rec.recarray)
181
182        self._init_tbdata(data)
183        data = data.view(self._data_type)
184        columns._add_listener(data)
185        return data
186
187    def _init_tbdata(self, data):
188        columns = self.columns
189
190        data.dtype = data.dtype.newbyteorder('>')
191
192        # hack to enable pseudo-uint support
193        data._uint = self._uint
194
195        # pass datLoc, for P format
196        data._heapoffset = self._theap
197        data._heapsize = self._header['PCOUNT']
198        tbsize = self._header['NAXIS1'] * self._header['NAXIS2']
199        data._gap = self._theap - tbsize
200
201        # pass the attributes
202        for idx, col in enumerate(columns):
203            # get the data for each column object from the rec.recarray
204            col.array = data.field(idx)
205
206        # delete the _arrays attribute so that it is recreated to point to the
207        # new data placed in the column object above
208        del columns._arrays
209
210    def _update_load_data(self):
211        """Load the data if asked to."""
212        if not self._data_loaded:
213            self.data
214
215    def _update_column_added(self, columns, column):
216        """
217        Update the data upon addition of a new column through the `ColDefs`
218        interface.
219        """
220        # recreate data from the columns
221        self.data = FITS_rec.from_columns(
222            self.columns, nrows=self._nrows, fill=False,
223            character_as_bytes=self._character_as_bytes
224        )
225
226    def _update_column_removed(self, columns, col_idx):
227        """
228        Update the data upon removal of a column through the `ColDefs`
229        interface.
230        """
231        # recreate data from the columns
232        self.data = FITS_rec.from_columns(
233            self.columns, nrows=self._nrows, fill=False,
234            character_as_bytes=self._character_as_bytes
235        )
236
237
238class _TableBaseHDU(ExtensionHDU, _TableLikeHDU):
239    """
240    FITS table extension base HDU class.
241
242    Parameters
243    ----------
244    data : array
245        Data to be used.
246    header : `Header` instance
247        Header to be used. If the ``data`` is also specified, header keywords
248        specifically related to defining the table structure (such as the
249        "TXXXn" keywords like TTYPEn) will be overridden by the supplied column
250        definitions, but all other informational and data model-specific
251        keywords are kept.
252    name : str
253        Name to be populated in ``EXTNAME`` keyword.
254    uint : bool, optional
255        Set to `True` if the table contains unsigned integer columns.
256    ver : int > 0 or None, optional
257        The ver of the HDU, will be the value of the keyword ``EXTVER``.
258        If not given or None, it defaults to the value of the ``EXTVER``
259        card of the ``header`` or 1.
260        (default: None)
261    character_as_bytes : bool
262        Whether to return bytes for string columns. By default this is `False`
263        and (unicode) strings are returned, but this does not respect memory
264        mapping and loads the whole column in memory when accessed.
265    """
266
267    _manages_own_heap = False
268    """
269    This flag implies that when writing VLA tables (P/Q format) the heap
270    pointers that go into P/Q table columns should not be reordered or
271    rearranged in any way by the default heap management code.
272
273    This is included primarily as an optimization for compressed image HDUs
274    which perform their own heap maintenance.
275    """
276
277    def __init__(self, data=None, header=None, name=None, uint=False, ver=None,
278                 character_as_bytes=False):
279
280        super().__init__(data=data, header=header, name=name, ver=ver)
281
282        self._uint = uint
283        self._character_as_bytes = character_as_bytes
284
285        if data is DELAYED:
286            # this should never happen
287            if header is None:
288                raise ValueError('No header to setup HDU.')
289
290            # if the file is read the first time, no need to copy, and keep it
291            # unchanged
292            else:
293                self._header = header
294        else:
295            # construct a list of cards of minimal header
296            cards = [
297                ('XTENSION', self._extension, self._ext_comment),
298                ('BITPIX', 8, 'array data type'),
299                ('NAXIS', 2, 'number of array dimensions'),
300                ('NAXIS1', 0, 'length of dimension 1'),
301                ('NAXIS2', 0, 'length of dimension 2'),
302                ('PCOUNT', 0, 'number of group parameters'),
303                ('GCOUNT', 1, 'number of groups'),
304                ('TFIELDS', 0, 'number of table fields')]
305
306            if header is not None:
307
308                # Make a "copy" (not just a view) of the input header, since it
309                # may get modified.  the data is still a "view" (for now)
310                hcopy = header.copy(strip=True)
311                cards.extend(hcopy.cards)
312
313            self._header = Header(cards)
314
315            if isinstance(data, np.ndarray) and data.dtype.fields is not None:
316                # self._data_type is FITS_rec.
317                if isinstance(data, self._data_type):
318                    self.data = data
319                else:
320                    self.data = self._data_type.from_columns(data)
321
322                # TEMP: Special column keywords are normally overwritten by attributes
323                # from Column objects. In Astropy 3.0, several new keywords are now
324                # recognized as being special column keywords, but we don't
325                # automatically clear them yet, as we need to raise a deprecation
326                # warning for at least one major version.
327                if header is not None:
328                    future_ignore = set()
329                    for keyword in header.keys():
330                        match = TDEF_RE.match(keyword)
331                        try:
332                            base_keyword = match.group('label')
333                        except Exception:
334                            continue                # skip if there is no match
335                        if base_keyword in {'TCTYP', 'TCUNI', 'TCRPX', 'TCRVL', 'TCDLT', 'TRPOS'}:
336                            future_ignore.add(base_keyword)
337                    if future_ignore:
338                        keys = ', '.join(x + 'n' for x in sorted(future_ignore))
339                        warnings.warn("The following keywords are now recognized as special "
340                                      "column-related attributes and should be set via the "
341                                      "Column objects: {}. In future, these values will be "
342                                      "dropped from manually specified headers automatically "
343                                      "and replaced with values generated based on the "
344                                      "Column objects.".format(keys), AstropyDeprecationWarning)
345
346                # TODO: Too much of the code in this class uses header keywords
347                # in making calculations related to the data size.  This is
348                # unreliable, however, in cases when users mess with the header
349                # unintentionally--code that does this should be cleaned up.
350                self._header['NAXIS1'] = self.data._raw_itemsize
351                self._header['NAXIS2'] = self.data.shape[0]
352                self._header['TFIELDS'] = len(self.data._coldefs)
353
354                self.columns = self.data._coldefs
355                self.columns._add_listener(self.data)
356                self.update()
357
358                with suppress(TypeError, AttributeError):
359                    # Make the ndarrays in the Column objects of the ColDefs
360                    # object of the HDU reference the same ndarray as the HDU's
361                    # FITS_rec object.
362                    for idx, col in enumerate(self.columns):
363                        col.array = self.data.field(idx)
364
365                    # Delete the _arrays attribute so that it is recreated to
366                    # point to the new data placed in the column objects above
367                    del self.columns._arrays
368            elif data is None:
369                pass
370            else:
371                raise TypeError('Table data has incorrect type.')
372
373        # Ensure that the correct EXTNAME is set on the new header if one was
374        # created, or that it overrides the existing EXTNAME if different
375        if name:
376            self.name = name
377        if ver is not None:
378            self.ver = ver
379
380    @classmethod
381    def match_header(cls, header):
382        """
383        This is an abstract type that implements the shared functionality of
384        the ASCII and Binary Table HDU types, which should be used instead of
385        this.
386        """
387
388        raise NotImplementedError
389
390    @lazyproperty
391    def columns(self):
392        """
393        The :class:`ColDefs` objects describing the columns in this table.
394        """
395
396        if self._has_data and hasattr(self.data, '_coldefs'):
397            return self.data._coldefs
398        return self._columns_type(self)
399
400    @lazyproperty
401    def data(self):
402        data = self._get_tbdata()
403        data._coldefs = self.columns
404        data._character_as_bytes = self._character_as_bytes
405        # Columns should now just return a reference to the data._coldefs
406        del self.columns
407        return data
408
409    @data.setter
410    def data(self, data):
411        if 'data' in self.__dict__:
412            if self.__dict__['data'] is data:
413                return
414            else:
415                self._data_replaced = True
416        else:
417            self._data_replaced = True
418
419        self._modified = True
420
421        if data is None and self.columns:
422            # Create a new table with the same columns, but empty rows
423            formats = ','.join(self.columns._recformats)
424            data = np.rec.array(None, formats=formats,
425                                names=self.columns.names,
426                                shape=0)
427
428        if isinstance(data, np.ndarray) and data.dtype.fields is not None:
429            # Go ahead and always make a view, even if the data is already the
430            # correct class (self._data_type) so we can update things like the
431            # column defs, if necessary
432            data = data.view(self._data_type)
433
434            if not isinstance(data.columns, self._columns_type):
435                # This would be the place, if the input data was for an ASCII
436                # table and this is binary table, or vice versa, to convert the
437                # data to the appropriate format for the table type
438                new_columns = self._columns_type(data.columns)
439                data = FITS_rec.from_columns(new_columns)
440
441            if 'data' in self.__dict__:
442                self.columns._remove_listener(self.__dict__['data'])
443            self.__dict__['data'] = data
444
445            self.columns = self.data.columns
446            self.columns._add_listener(self.data)
447            self.update()
448
449            with suppress(TypeError, AttributeError):
450                # Make the ndarrays in the Column objects of the ColDefs
451                # object of the HDU reference the same ndarray as the HDU's
452                # FITS_rec object.
453                for idx, col in enumerate(self.columns):
454                    col.array = self.data.field(idx)
455
456                # Delete the _arrays attribute so that it is recreated to
457                # point to the new data placed in the column objects above
458                del self.columns._arrays
459        elif data is None:
460            pass
461        else:
462            raise TypeError('Table data has incorrect type.')
463
464        # returning the data signals to lazyproperty that we've already handled
465        # setting self.__dict__['data']
466        return data
467
468    @property
469    def _nrows(self):
470        if not self._data_loaded:
471            return self._header.get('NAXIS2', 0)
472        else:
473            return len(self.data)
474
475    @lazyproperty
476    def _theap(self):
477        size = self._header['NAXIS1'] * self._header['NAXIS2']
478        return self._header.get('THEAP', size)
479
480    # TODO: Need to either rename this to update_header, for symmetry with the
481    # Image HDUs, or just at some point deprecate it and remove it altogether,
482    # since header updates should occur automatically when necessary...
483    def update(self):
484        """
485        Update header keywords to reflect recent changes of columns.
486        """
487
488        self._header.set('NAXIS1', self.data._raw_itemsize, after='NAXIS')
489        self._header.set('NAXIS2', self.data.shape[0], after='NAXIS1')
490        self._header.set('TFIELDS', len(self.columns), after='GCOUNT')
491
492        self._clear_table_keywords()
493        self._populate_table_keywords()
494
495    def copy(self):
496        """
497        Make a copy of the table HDU, both header and data are copied.
498        """
499
500        # touch the data, so it's defined (in the case of reading from a
501        # FITS file)
502        return self.__class__(data=self.data.copy(),
503                              header=self._header.copy())
504
505    def _prewriteto(self, checksum=False, inplace=False):
506        if self._has_data:
507            self.data._scale_back(
508                update_heap_pointers=not self._manages_own_heap)
509            # check TFIELDS and NAXIS2
510            self._header['TFIELDS'] = len(self.data._coldefs)
511            self._header['NAXIS2'] = self.data.shape[0]
512
513            # calculate PCOUNT, for variable length tables
514            tbsize = self._header['NAXIS1'] * self._header['NAXIS2']
515            heapstart = self._header.get('THEAP', tbsize)
516            self.data._gap = heapstart - tbsize
517            pcount = self.data._heapsize + self.data._gap
518            if pcount > 0:
519                self._header['PCOUNT'] = pcount
520
521            # update the other T****n keywords
522            self._populate_table_keywords()
523
524            # update TFORM for variable length columns
525            for idx in range(self.data._nfields):
526                format = self.data._coldefs._recformats[idx]
527                if isinstance(format, _FormatP):
528                    _max = self.data.field(idx).max
529                    # May be either _FormatP or _FormatQ
530                    format_cls = format.__class__
531                    format = format_cls(format.dtype, repeat=format.repeat,
532                                        max=_max)
533                    self._header['TFORM' + str(idx + 1)] = format.tform
534        return super()._prewriteto(checksum, inplace)
535
536    def _verify(self, option='warn'):
537        """
538        _TableBaseHDU verify method.
539        """
540
541        errs = super()._verify(option=option)
542        if not (isinstance(self._header[0], str) and
543                self._header[0].rstrip() == self._extension):
544
545            err_text = 'The XTENSION keyword must match the HDU type.'
546            fix_text = f'Converted the XTENSION keyword to {self._extension}.'
547
548            def fix(header=self._header):
549                header[0] = (self._extension, self._ext_comment)
550
551            errs.append(self.run_option(option, err_text=err_text,
552                                        fix_text=fix_text, fix=fix))
553
554        self.req_cards('NAXIS', None, lambda v: (v == 2), 2, option, errs)
555        self.req_cards('BITPIX', None, lambda v: (v == 8), 8, option, errs)
556        self.req_cards('TFIELDS', 7,
557                       lambda v: (_is_int(v) and v >= 0 and v <= 999), 0,
558                       option, errs)
559        tfields = self._header['TFIELDS']
560        for idx in range(tfields):
561            self.req_cards('TFORM' + str(idx + 1), None, None, None, option,
562                           errs)
563        return errs
564
565    def _summary(self):
566        """
567        Summarize the HDU: name, dimensions, and formats.
568        """
569
570        class_name = self.__class__.__name__
571
572        # if data is touched, use data info.
573        if self._data_loaded:
574            if self.data is None:
575                nrows = 0
576            else:
577                nrows = len(self.data)
578
579            ncols = len(self.columns)
580            format = self.columns.formats
581
582        # if data is not touched yet, use header info.
583        else:
584            nrows = self._header['NAXIS2']
585            ncols = self._header['TFIELDS']
586            format = ', '.join([self._header['TFORM' + str(j + 1)]
587                                for j in range(ncols)])
588            format = f'[{format}]'
589        dims = f"{nrows}R x {ncols}C"
590        ncards = len(self._header)
591
592        return (self.name, self.ver, class_name, ncards, dims, format)
593
594    def _update_column_removed(self, columns, idx):
595        super()._update_column_removed(columns, idx)
596
597        # Fix the header to reflect the column removal
598        self._clear_table_keywords(index=idx)
599
600    def _update_column_attribute_changed(self, column, col_idx, attr,
601                                         old_value, new_value):
602        """
603        Update the header when one of the column objects is updated.
604        """
605
606        # base_keyword is the keyword without the index such as TDIM
607        # while keyword is like TDIM1
608        base_keyword = ATTRIBUTE_TO_KEYWORD[attr]
609        keyword = base_keyword + str(col_idx + 1)
610
611        if keyword in self._header:
612            if new_value is None:
613                # If the new value is None, i.e. None was assigned to the
614                # column attribute, then treat this as equivalent to deleting
615                # that attribute
616                del self._header[keyword]
617            else:
618                self._header[keyword] = new_value
619        else:
620            keyword_idx = KEYWORD_NAMES.index(base_keyword)
621            # Determine the appropriate keyword to insert this one before/after
622            # if it did not already exist in the header
623            for before_keyword in reversed(KEYWORD_NAMES[:keyword_idx]):
624                before_keyword += str(col_idx + 1)
625                if before_keyword in self._header:
626                    self._header.insert(before_keyword, (keyword, new_value),
627                                        after=True)
628                    break
629            else:
630                for after_keyword in KEYWORD_NAMES[keyword_idx + 1:]:
631                    after_keyword += str(col_idx + 1)
632                    if after_keyword in self._header:
633                        self._header.insert(after_keyword,
634                                            (keyword, new_value))
635                        break
636                else:
637                    # Just append
638                    self._header[keyword] = new_value
639
640    def _clear_table_keywords(self, index=None):
641        """
642        Wipe out any existing table definition keywords from the header.
643
644        If specified, only clear keywords for the given table index (shifting
645        up keywords for any other columns).  The index is zero-based.
646        Otherwise keywords for all columns.
647        """
648
649        # First collect all the table structure related keyword in the header
650        # into a single list so we can then sort them by index, which will be
651        # useful later for updating the header in a sensible order (since the
652        # header *might* not already be written in a reasonable order)
653        table_keywords = []
654
655        for idx, keyword in enumerate(self._header.keys()):
656            match = TDEF_RE.match(keyword)
657            try:
658                base_keyword = match.group('label')
659            except Exception:
660                continue                # skip if there is no match
661
662            if base_keyword in KEYWORD_TO_ATTRIBUTE:
663
664                # TEMP: For Astropy 3.0 we don't clear away the following keywords
665                # as we are first raising a deprecation warning that these will be
666                # dropped automatically if they were specified in the header. We
667                # can remove this once we are happy to break backward-compatibility
668                if base_keyword in {'TCTYP', 'TCUNI', 'TCRPX', 'TCRVL', 'TCDLT', 'TRPOS'}:
669                    continue
670
671                num = int(match.group('num')) - 1  # convert to zero-base
672                table_keywords.append((idx, match.group(0), base_keyword,
673                                       num))
674
675        # First delete
676        rev_sorted_idx_0 = sorted(table_keywords, key=operator.itemgetter(0),
677                                  reverse=True)
678        for idx, keyword, _, num in rev_sorted_idx_0:
679            if index is None or index == num:
680                del self._header[idx]
681
682        # Now shift up remaining column keywords if only one column was cleared
683        if index is not None:
684            sorted_idx_3 = sorted(table_keywords, key=operator.itemgetter(3))
685            for _, keyword, base_keyword, num in sorted_idx_3:
686                if num <= index:
687                    continue
688
689                old_card = self._header.cards[keyword]
690                new_card = (base_keyword + str(num), old_card.value,
691                            old_card.comment)
692                self._header.insert(keyword, new_card)
693                del self._header[keyword]
694
695            # Also decrement TFIELDS
696            if 'TFIELDS' in self._header:
697                self._header['TFIELDS'] -= 1
698
699    def _populate_table_keywords(self):
700        """Populate the new table definition keywords from the header."""
701
702        for idx, column in enumerate(self.columns):
703            for keyword, attr in KEYWORD_TO_ATTRIBUTE.items():
704                val = getattr(column, attr)
705                if val is not None:
706                    keyword = keyword + str(idx + 1)
707                    self._header[keyword] = val
708
709
710class TableHDU(_TableBaseHDU):
711    """
712    FITS ASCII table extension HDU class.
713
714    Parameters
715    ----------
716    data : array or `FITS_rec`
717        Data to be used.
718    header : `Header`
719        Header to be used.
720    name : str
721        Name to be populated in ``EXTNAME`` keyword.
722    ver : int > 0 or None, optional
723        The ver of the HDU, will be the value of the keyword ``EXTVER``.
724        If not given or None, it defaults to the value of the ``EXTVER``
725        card of the ``header`` or 1.
726        (default: None)
727    character_as_bytes : bool
728        Whether to return bytes for string columns. By default this is `False`
729        and (unicode) strings are returned, but this does not respect memory
730        mapping and loads the whole column in memory when accessed.
731
732    """
733
734    _extension = 'TABLE'
735    _ext_comment = 'ASCII table extension'
736
737    _padding_byte = ' '
738    _columns_type = _AsciiColDefs
739
740    __format_RE = re.compile(
741        r'(?P<code>[ADEFIJ])(?P<width>\d+)(?:\.(?P<prec>\d+))?')
742
743    def __init__(self, data=None, header=None, name=None, ver=None, character_as_bytes=False):
744        super().__init__(data, header, name=name, ver=ver, character_as_bytes=character_as_bytes)
745
746    @classmethod
747    def match_header(cls, header):
748        card = header.cards[0]
749        xtension = card.value
750        if isinstance(xtension, str):
751            xtension = xtension.rstrip()
752        return card.keyword == 'XTENSION' and xtension == cls._extension
753
754    def _get_tbdata(self):
755        columns = self.columns
756        names = [n for idx, n in enumerate(columns.names)]
757
758        # determine if there are duplicate field names and if there
759        # are throw an exception
760        dup = np.rec.find_duplicate(names)
761
762        if dup:
763            raise ValueError(f"Duplicate field names: {dup}")
764
765        # TODO: Determine if this extra logic is necessary--I feel like the
766        # _AsciiColDefs class should be responsible for telling the table what
767        # its dtype should be...
768        itemsize = columns.spans[-1] + columns.starts[-1] - 1
769        dtype = {}
770
771        for idx in range(len(columns)):
772            data_type = 'S' + str(columns.spans[idx])
773
774            if idx == len(columns) - 1:
775                # The last column is padded out to the value of NAXIS1
776                if self._header['NAXIS1'] > itemsize:
777                    data_type = 'S' + str(columns.spans[idx] +
778                                self._header['NAXIS1'] - itemsize)
779            dtype[columns.names[idx]] = (data_type, columns.starts[idx] - 1)
780
781        raw_data = self._get_raw_data(self._nrows, dtype, self._data_offset)
782        data = raw_data.view(np.rec.recarray)
783        self._init_tbdata(data)
784        return data.view(self._data_type)
785
786    def _calculate_datasum(self):
787        """
788        Calculate the value for the ``DATASUM`` card in the HDU.
789        """
790
791        if self._has_data:
792            # We have the data to be used.
793            # We need to pad the data to a block length before calculating
794            # the datasum.
795            bytes_array = self.data.view(type=np.ndarray, dtype=np.ubyte)
796            padding = np.frombuffer(_pad_length(self.size) * b' ',
797                                    dtype=np.ubyte)
798
799            d = np.append(bytes_array, padding)
800
801            cs = self._compute_checksum(d)
802            return cs
803        else:
804            # This is the case where the data has not been read from the file
805            # yet.  We can handle that in a generic manner so we do it in the
806            # base class.  The other possibility is that there is no data at
807            # all.  This can also be handled in a generic manner.
808            return super()._calculate_datasum()
809
810    def _verify(self, option='warn'):
811        """
812        `TableHDU` verify method.
813        """
814
815        errs = super()._verify(option=option)
816        self.req_cards('PCOUNT', None, lambda v: (v == 0), 0, option, errs)
817        tfields = self._header['TFIELDS']
818        for idx in range(tfields):
819            self.req_cards('TBCOL' + str(idx + 1), None, _is_int, None, option,
820                           errs)
821        return errs
822
823
824class BinTableHDU(_TableBaseHDU):
825    """
826    Binary table HDU class.
827
828    Parameters
829    ----------
830    data : array, `FITS_rec`, or `~astropy.table.Table`
831        Data to be used.
832    header : `Header`
833        Header to be used.
834    name : str
835        Name to be populated in ``EXTNAME`` keyword.
836    uint : bool, optional
837        Set to `True` if the table contains unsigned integer columns.
838    ver : int > 0 or None, optional
839        The ver of the HDU, will be the value of the keyword ``EXTVER``.
840        If not given or None, it defaults to the value of the ``EXTVER``
841        card of the ``header`` or 1.
842        (default: None)
843    character_as_bytes : bool
844        Whether to return bytes for string columns. By default this is `False`
845        and (unicode) strings are returned, but this does not respect memory
846        mapping and loads the whole column in memory when accessed.
847
848    """
849
850    _extension = 'BINTABLE'
851    _ext_comment = 'binary table extension'
852
853    def __init__(self, data=None, header=None, name=None, uint=False, ver=None,
854                 character_as_bytes=False):
855        from astropy.table import Table
856        if isinstance(data, Table):
857            from astropy.io.fits.convenience import table_to_hdu
858            hdu = table_to_hdu(data)
859            if header is not None:
860                hdu.header.update(header)
861            data = hdu.data
862            header = hdu.header
863
864        super().__init__(data, header, name=name, uint=uint, ver=ver,
865                         character_as_bytes=character_as_bytes)
866
867    @classmethod
868    def match_header(cls, header):
869        card = header.cards[0]
870        xtension = card.value
871        if isinstance(xtension, str):
872            xtension = xtension.rstrip()
873        return (card.keyword == 'XTENSION' and
874                xtension in (cls._extension, 'A3DTABLE'))
875
876    def _calculate_datasum_with_heap(self):
877        """
878        Calculate the value for the ``DATASUM`` card given the input data
879        """
880
881        with _binary_table_byte_swap(self.data) as data:
882            dout = data.view(type=np.ndarray, dtype=np.ubyte)
883            csum = self._compute_checksum(dout)
884
885            # Now add in the heap data to the checksum (we can skip any gap
886            # between the table and the heap since it's all zeros and doesn't
887            # contribute to the checksum
888            if data._get_raw_data() is None:
889                # This block is still needed because
890                # test_variable_length_table_data leads to ._get_raw_data
891                # returning None which means _get_heap_data doesn't work.
892                # Which happens when the data is loaded in memory rather than
893                # being unloaded on disk
894                for idx in range(data._nfields):
895                    if isinstance(data.columns._recformats[idx], _FormatP):
896                        for coldata in data.field(idx):
897                            # coldata should already be byteswapped from the call
898                            # to _binary_table_byte_swap
899                            if not len(coldata):
900                                continue
901
902                            csum = self._compute_checksum(coldata, csum)
903            else:
904                csum = self._compute_checksum(data._get_heap_data(), csum)
905
906            return csum
907
908    def _calculate_datasum(self):
909        """
910        Calculate the value for the ``DATASUM`` card in the HDU.
911        """
912
913        if self._has_data:
914            # This method calculates the datasum while incorporating any
915            # heap data, which is obviously not handled from the base
916            # _calculate_datasum
917            return self._calculate_datasum_with_heap()
918        else:
919            # This is the case where the data has not been read from the file
920            # yet.  We can handle that in a generic manner so we do it in the
921            # base class.  The other possibility is that there is no data at
922            # all.  This can also be handled in a generic manner.
923            return super()._calculate_datasum()
924
925    def _writedata_internal(self, fileobj):
926        size = 0
927
928        if self.data is None:
929            return size
930
931        with _binary_table_byte_swap(self.data) as data:
932            if _has_unicode_fields(data):
933                # If the raw data was a user-supplied recarray, we can't write
934                # unicode columns directly to the file, so we have to switch
935                # to a slower row-by-row write
936                self._writedata_by_row(fileobj)
937            else:
938                fileobj.writearray(data)
939                # write out the heap of variable length array columns this has
940                # to be done after the "regular" data is written (above)
941                # to avoid a bug in the lustre filesystem client, don't
942                # write 0-byte objects
943                if data._gap > 0:
944                    fileobj.write((data._gap * '\0').encode('ascii'))
945
946            nbytes = data._gap
947
948            if not self._manages_own_heap:
949                # Write the heap data one column at a time, in the order
950                # that the data pointers appear in the column (regardless
951                # if that data pointer has a different, previous heap
952                # offset listed)
953                for idx in range(data._nfields):
954                    if not isinstance(data.columns._recformats[idx],
955                                      _FormatP):
956                        continue
957
958                    field = self.data.field(idx)
959                    for row in field:
960                        if len(row) > 0:
961                            nbytes += row.nbytes
962                            fileobj.writearray(row)
963            else:
964                heap_data = data._get_heap_data()
965                if len(heap_data) > 0:
966                    nbytes += len(heap_data)
967                    fileobj.writearray(heap_data)
968
969            data._heapsize = nbytes - data._gap
970            size += nbytes
971
972        size += self.data.size * self.data._raw_itemsize
973
974        return size
975
976    def _writedata_by_row(self, fileobj):
977        fields = [self.data.field(idx)
978                  for idx in range(len(self.data.columns))]
979
980        # Creating Record objects is expensive (as in
981        # `for row in self.data:` so instead we just iterate over the row
982        # indices and get one field at a time:
983        for idx in range(len(self.data)):
984            for field in fields:
985                item = field[idx]
986                field_width = None
987
988                if field.dtype.kind == 'U':
989                    # Read the field *width* by reading past the field kind.
990                    i = field.dtype.str.index(field.dtype.kind)
991                    field_width = int(field.dtype.str[i+1:])
992                    item = np.char.encode(item, 'ascii')
993
994                fileobj.writearray(item)
995                if field_width is not None:
996                    j = item.dtype.str.index(item.dtype.kind)
997                    item_length = int(item.dtype.str[j+1:])
998                    # Fix padding problem (see #5296).
999                    padding = '\x00'*(field_width - item_length)
1000                    fileobj.write(padding.encode('ascii'))
1001
1002    _tdump_file_format = textwrap.dedent("""
1003
1004        - **datafile:** Each line of the data file represents one row of table
1005          data.  The data is output one column at a time in column order.  If
1006          a column contains an array, each element of the column array in the
1007          current row is output before moving on to the next column.  Each row
1008          ends with a new line.
1009
1010          Integer data is output right-justified in a 21-character field
1011          followed by a blank.  Floating point data is output right justified
1012          using 'g' format in a 21-character field with 15 digits of
1013          precision, followed by a blank.  String data that does not contain
1014          whitespace is output left-justified in a field whose width matches
1015          the width specified in the ``TFORM`` header parameter for the
1016          column, followed by a blank.  When the string data contains
1017          whitespace characters, the string is enclosed in quotation marks
1018          (``""``).  For the last data element in a row, the trailing blank in
1019          the field is replaced by a new line character.
1020
1021          For column data containing variable length arrays ('P' format), the
1022          array data is preceded by the string ``'VLA_Length= '`` and the
1023          integer length of the array for that row, left-justified in a
1024          21-character field, followed by a blank.
1025
1026          .. note::
1027
1028              This format does *not* support variable length arrays using the
1029              ('Q' format) due to difficult to overcome ambiguities. What this
1030              means is that this file format cannot support VLA columns in
1031              tables stored in files that are over 2 GB in size.
1032
1033          For column data representing a bit field ('X' format), each bit
1034          value in the field is output right-justified in a 21-character field
1035          as 1 (for true) or 0 (for false).
1036
1037        - **cdfile:** Each line of the column definitions file provides the
1038          definitions for one column in the table.  The line is broken up into
1039          8, sixteen-character fields.  The first field provides the column
1040          name (``TTYPEn``).  The second field provides the column format
1041          (``TFORMn``).  The third field provides the display format
1042          (``TDISPn``).  The fourth field provides the physical units
1043          (``TUNITn``).  The fifth field provides the dimensions for a
1044          multidimensional array (``TDIMn``).  The sixth field provides the
1045          value that signifies an undefined value (``TNULLn``).  The seventh
1046          field provides the scale factor (``TSCALn``).  The eighth field
1047          provides the offset value (``TZEROn``).  A field value of ``""`` is
1048          used to represent the case where no value is provided.
1049
1050        - **hfile:** Each line of the header parameters file provides the
1051          definition of a single HDU header card as represented by the card
1052          image.
1053      """)
1054
1055    @deprecated_renamed_argument('clobber', 'overwrite', '2.0',
1056                                 message='"clobber" was deprecated in version '
1057                                         '2.0 and will be removed in version '
1058                                         '5.1. Use argument "overwrite" '
1059                                         'instead.')
1060    def dump(self, datafile=None, cdfile=None, hfile=None, overwrite=False):
1061        """
1062        Dump the table HDU to a file in ASCII format.  The table may be dumped
1063        in three separate files, one containing column definitions, one
1064        containing header parameters, and one for table data.
1065
1066        Parameters
1067        ----------
1068        datafile : path-like or file-like, optional
1069            Output data file.  The default is the root name of the
1070            fits file associated with this HDU appended with the
1071            extension ``.txt``.
1072
1073        cdfile : path-like or file-like, optional
1074            Output column definitions file.  The default is `None`, no
1075            column definitions output is produced.
1076
1077        hfile : path-like or file-like, optional
1078            Output header parameters file.  The default is `None`,
1079            no header parameters output is produced.
1080
1081        overwrite : bool, optional
1082            If ``True``, overwrite the output file if it exists. Raises an
1083            ``OSError`` if ``False`` and the output file exists. Default is
1084            ``False``.
1085
1086            .. versionchanged:: 1.3
1087               ``overwrite`` replaces the deprecated ``clobber`` argument.
1088
1089        Notes
1090        -----
1091        The primary use for the `dump` method is to allow viewing and editing
1092        the table data and parameters in a standard text editor.
1093        The `load` method can be used to create a new table from the three
1094        plain text (ASCII) files.
1095        """
1096
1097        # check if the output files already exist
1098        exist = []
1099        files = [datafile, cdfile, hfile]
1100
1101        for f in files:
1102            if isinstance(f, str):
1103                if os.path.exists(f) and os.path.getsize(f) != 0:
1104                    if overwrite:
1105                        os.remove(f)
1106                    else:
1107                        exist.append(f)
1108
1109        if exist:
1110            raise OSError('  '.join([f"File '{f}' already exists."
1111                                     for f in exist])+"  If you mean to "
1112                                                      "replace the file(s) "
1113                                                      "then use the argument "
1114                                                      "'overwrite=True'.")
1115
1116        # Process the data
1117        self._dump_data(datafile)
1118
1119        # Process the column definitions
1120        if cdfile:
1121            self._dump_coldefs(cdfile)
1122
1123        # Process the header parameters
1124        if hfile:
1125            self._header.tofile(hfile, sep='\n', endcard=False, padding=False)
1126
1127    if isinstance(dump.__doc__, str):
1128        dump.__doc__ += _tdump_file_format.replace('\n', '\n        ')
1129
1130    def load(cls, datafile, cdfile=None, hfile=None, replace=False,
1131             header=None):
1132        """
1133        Create a table from the input ASCII files.  The input is from up to
1134        three separate files, one containing column definitions, one containing
1135        header parameters, and one containing column data.
1136
1137        The column definition and header parameters files are not required.
1138        When absent the column definitions and/or header parameters are taken
1139        from the header object given in the header argument; otherwise sensible
1140        defaults are inferred (though this mode is not recommended).
1141
1142        Parameters
1143        ----------
1144        datafile : path-like or file-like
1145            Input data file containing the table data in ASCII format.
1146
1147        cdfile : path-like or file-like, optional
1148            Input column definition file containing the names,
1149            formats, display formats, physical units, multidimensional
1150            array dimensions, undefined values, scale factors, and
1151            offsets associated with the columns in the table.  If
1152            `None`, the column definitions are taken from the current
1153            values in this object.
1154
1155        hfile : path-like or file-like, optional
1156            Input parameter definition file containing the header
1157            parameter definitions to be associated with the table.  If
1158            `None`, the header parameter definitions are taken from
1159            the current values in this objects header.
1160
1161        replace : bool, optional
1162            When `True`, indicates that the entire header should be
1163            replaced with the contents of the ASCII file instead of
1164            just updating the current header.
1165
1166        header : `~astropy.io.fits.Header`, optional
1167            When the cdfile and hfile are missing, use this Header object in
1168            the creation of the new table and HDU.  Otherwise this Header
1169            supersedes the keywords from hfile, which is only used to update
1170            values not present in this Header, unless ``replace=True`` in which
1171            this Header's values are completely replaced with the values from
1172            hfile.
1173
1174        Notes
1175        -----
1176        The primary use for the `load` method is to allow the input of ASCII
1177        data that was edited in a standard text editor of the table data and
1178        parameters.  The `dump` method can be used to create the initial ASCII
1179        files.
1180        """
1181
1182        # Process the parameter file
1183        if header is None:
1184            header = Header()
1185
1186        if hfile:
1187            if replace:
1188                header = Header.fromtextfile(hfile)
1189            else:
1190                header.extend(Header.fromtextfile(hfile), update=True,
1191                              update_first=True)
1192
1193        coldefs = None
1194        # Process the column definitions file
1195        if cdfile:
1196            coldefs = cls._load_coldefs(cdfile)
1197
1198        # Process the data file
1199        data = cls._load_data(datafile, coldefs)
1200        if coldefs is None:
1201            coldefs = ColDefs(data)
1202
1203        # Create a new HDU using the supplied header and data
1204        hdu = cls(data=data, header=header)
1205        hdu.columns = coldefs
1206        return hdu
1207
1208    if isinstance(load.__doc__, str):
1209        load.__doc__ += _tdump_file_format.replace('\n', '\n        ')
1210
1211    load = classmethod(load)
1212    # Have to create a classmethod from this here instead of as a decorator;
1213    # otherwise we can't update __doc__
1214
1215    def _dump_data(self, fileobj):
1216        """
1217        Write the table data in the ASCII format read by BinTableHDU.load()
1218        to fileobj.
1219        """
1220
1221        if not fileobj and self._file:
1222            root = os.path.splitext(self._file.name)[0]
1223            fileobj = root + '.txt'
1224
1225        close_file = False
1226
1227        if isinstance(fileobj, str):
1228            fileobj = open(fileobj, 'w')
1229            close_file = True
1230
1231        linewriter = csv.writer(fileobj, dialect=FITSTableDumpDialect)
1232
1233        # Process each row of the table and output one row at a time
1234        def format_value(val, format):
1235            if format[0] == 'S':
1236                itemsize = int(format[1:])
1237                return '{:{size}}'.format(val, size=itemsize)
1238            elif format in np.typecodes['AllInteger']:
1239                # output integer
1240                return f'{val:21d}'
1241            elif format in np.typecodes['Complex']:
1242                return f'{val.real:21.15g}+{val.imag:.15g}j'
1243            elif format in np.typecodes['Float']:
1244                # output floating point
1245                return f'{val:#21.15g}'
1246
1247        for row in self.data:
1248            line = []   # the line for this row of the table
1249
1250            # Process each column of the row.
1251            for column in self.columns:
1252                # format of data in a variable length array
1253                # where None means it is not a VLA:
1254                vla_format = None
1255                format = _convert_format(column.format)
1256
1257                if isinstance(format, _FormatP):
1258                    # P format means this is a variable length array so output
1259                    # the length of the array for this row and set the format
1260                    # for the VLA data
1261                    line.append('VLA_Length=')
1262                    line.append(f'{len(row[column.name]):21d}')
1263                    _, dtype, option = _parse_tformat(column.format)
1264                    vla_format = FITS2NUMPY[option[0]][0]
1265
1266                if vla_format:
1267                    # Output the data for each element in the array
1268                    for val in row[column.name].flat:
1269                        line.append(format_value(val, vla_format))
1270                else:
1271                    # The column data is a single element
1272                    dtype = self.data.dtype.fields[column.name][0]
1273                    array_format = dtype.char
1274                    if array_format == 'V':
1275                        array_format = dtype.base.char
1276                    if array_format == 'S':
1277                        array_format += str(dtype.itemsize)
1278
1279                    if dtype.char == 'V':
1280                        for value in row[column.name].flat:
1281                            line.append(format_value(value, array_format))
1282                    else:
1283                        line.append(format_value(row[column.name],
1284                                    array_format))
1285            linewriter.writerow(line)
1286        if close_file:
1287            fileobj.close()
1288
1289    def _dump_coldefs(self, fileobj):
1290        """
1291        Write the column definition parameters in the ASCII format read by
1292        BinTableHDU.load() to fileobj.
1293        """
1294
1295        close_file = False
1296
1297        if isinstance(fileobj, str):
1298            fileobj = open(fileobj, 'w')
1299            close_file = True
1300
1301        # Process each column of the table and output the result to the
1302        # file one at a time
1303        for column in self.columns:
1304            line = [column.name, column.format]
1305            attrs = ['disp', 'unit', 'dim', 'null', 'bscale', 'bzero']
1306            line += ['{:16s}'.format(value if value else '""')
1307                     for value in (getattr(column, attr) for attr in attrs)]
1308            fileobj.write(' '.join(line))
1309            fileobj.write('\n')
1310
1311        if close_file:
1312            fileobj.close()
1313
1314    @classmethod
1315    def _load_data(cls, fileobj, coldefs=None):
1316        """
1317        Read the table data from the ASCII file output by BinTableHDU.dump().
1318        """
1319
1320        close_file = False
1321
1322        if isinstance(fileobj, str):
1323            fileobj = open(fileobj, 'r')
1324            close_file = True
1325
1326        initialpos = fileobj.tell()  # We'll be returning here later
1327        linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect)
1328
1329        # First we need to do some preprocessing on the file to find out how
1330        # much memory we'll need to reserve for the table.  This is necessary
1331        # even if we already have the coldefs in order to determine how many
1332        # rows to reserve memory for
1333        vla_lengths = []
1334        recformats = []
1335        names = []
1336        nrows = 0
1337        if coldefs is not None:
1338            recformats = coldefs._recformats
1339            names = coldefs.names
1340
1341        def update_recformats(value, idx):
1342            fitsformat = _scalar_to_format(value)
1343            recformat = _convert_format(fitsformat)
1344            if idx >= len(recformats):
1345                recformats.append(recformat)
1346            else:
1347                if _cmp_recformats(recformats[idx], recformat) < 0:
1348                    recformats[idx] = recformat
1349
1350        # TODO: The handling of VLAs could probably be simplified a bit
1351        for row in linereader:
1352            nrows += 1
1353            if coldefs is not None:
1354                continue
1355            col = 0
1356            idx = 0
1357            while idx < len(row):
1358                if row[idx] == 'VLA_Length=':
1359                    if col < len(vla_lengths):
1360                        vla_length = vla_lengths[col]
1361                    else:
1362                        vla_length = int(row[idx + 1])
1363                        vla_lengths.append(vla_length)
1364                    idx += 2
1365                    while vla_length:
1366                        update_recformats(row[idx], col)
1367                        vla_length -= 1
1368                        idx += 1
1369                    col += 1
1370                else:
1371                    if col >= len(vla_lengths):
1372                        vla_lengths.append(None)
1373                    update_recformats(row[idx], col)
1374                    col += 1
1375                    idx += 1
1376
1377        # Update the recformats for any VLAs
1378        for idx, length in enumerate(vla_lengths):
1379            if length is not None:
1380                recformats[idx] = str(length) + recformats[idx]
1381
1382        dtype = np.rec.format_parser(recformats, names, None).dtype
1383
1384        # TODO: In the future maybe enable loading a bit at a time so that we
1385        # can convert from this format to an actual FITS file on disk without
1386        # needing enough physical memory to hold the entire thing at once
1387        hdu = BinTableHDU.from_columns(np.recarray(shape=1, dtype=dtype),
1388                                       nrows=nrows, fill=True)
1389
1390        # TODO: It seems to me a lot of this could/should be handled from
1391        # within the FITS_rec class rather than here.
1392        data = hdu.data
1393        for idx, length in enumerate(vla_lengths):
1394            if length is not None:
1395                arr = data.columns._arrays[idx]
1396                dt = recformats[idx][len(str(length)):]
1397
1398                # NOTE: FormatQ not supported here; it's hard to determine
1399                # whether or not it will be necessary to use a wider descriptor
1400                # type. The function documentation will have to serve as a
1401                # warning that this is not supported.
1402                recformats[idx] = _FormatP(dt, max=length)
1403                data.columns._recformats[idx] = recformats[idx]
1404                name = data.columns.names[idx]
1405                data._cache_field(name, _makep(arr, arr, recformats[idx]))
1406
1407        def format_value(col, val):
1408            # Special formatting for a couple particular data types
1409            if recformats[col] == FITS2NUMPY['L']:
1410                return bool(int(val))
1411            elif recformats[col] == FITS2NUMPY['M']:
1412                # For some reason, in arrays/fields where numpy expects a
1413                # complex it's not happy to take a string representation
1414                # (though it's happy to do that in other contexts), so we have
1415                # to convert the string representation for it:
1416                return complex(val)
1417            else:
1418                return val
1419
1420        # Jump back to the start of the data and create a new line reader
1421        fileobj.seek(initialpos)
1422        linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect)
1423        for row, line in enumerate(linereader):
1424            col = 0
1425            idx = 0
1426            while idx < len(line):
1427                if line[idx] == 'VLA_Length=':
1428                    vla_len = vla_lengths[col]
1429                    idx += 2
1430                    slice_ = slice(idx, idx + vla_len)
1431                    data[row][col][:] = line[idx:idx + vla_len]
1432                    idx += vla_len
1433                elif dtype[col].shape:
1434                    # This is an array column
1435                    array_size = int(np.multiply.reduce(dtype[col].shape))
1436                    slice_ = slice(idx, idx + array_size)
1437                    idx += array_size
1438                else:
1439                    slice_ = None
1440
1441                if slice_ is None:
1442                    # This is a scalar row element
1443                    data[row][col] = format_value(col, line[idx])
1444                    idx += 1
1445                else:
1446                    data[row][col].flat[:] = [format_value(col, val)
1447                                              for val in line[slice_]]
1448
1449                col += 1
1450
1451        if close_file:
1452            fileobj.close()
1453
1454        return data
1455
1456    @classmethod
1457    def _load_coldefs(cls, fileobj):
1458        """
1459        Read the table column definitions from the ASCII file output by
1460        BinTableHDU.dump().
1461        """
1462
1463        close_file = False
1464
1465        if isinstance(fileobj, str):
1466            fileobj = open(fileobj, 'r')
1467            close_file = True
1468
1469        columns = []
1470
1471        for line in fileobj:
1472            words = line[:-1].split()
1473            kwargs = {}
1474            for key in ['name', 'format', 'disp', 'unit', 'dim']:
1475                kwargs[key] = words.pop(0).replace('""', '')
1476
1477            for key in ['null', 'bscale', 'bzero']:
1478                word = words.pop(0).replace('""', '')
1479                if word:
1480                    word = _str_to_num(word)
1481                kwargs[key] = word
1482            columns.append(Column(**kwargs))
1483
1484        if close_file:
1485            fileobj.close()
1486
1487        return ColDefs(columns)
1488
1489
1490@contextlib.contextmanager
1491def _binary_table_byte_swap(data):
1492    """
1493    Ensures that all the data of a binary FITS table (represented as a FITS_rec
1494    object) is in a big-endian byte order.  Columns are swapped in-place one
1495    at a time, and then returned to their previous byte order when this context
1496    manager exits.
1497
1498    Because a new dtype is needed to represent the byte-swapped columns, the
1499    new dtype is temporarily applied as well.
1500    """
1501
1502    orig_dtype = data.dtype
1503
1504    names = []
1505    formats = []
1506    offsets = []
1507
1508    to_swap = []
1509
1510    if sys.byteorder == 'little':
1511        swap_types = ('<', '=')
1512    else:
1513        swap_types = ('<',)
1514
1515    for idx, name in enumerate(orig_dtype.names):
1516        field = _get_recarray_field(data, idx)
1517
1518        field_dtype, field_offset = orig_dtype.fields[name]
1519        names.append(name)
1520        formats.append(field_dtype)
1521        offsets.append(field_offset)
1522
1523        if isinstance(field, chararray.chararray):
1524            continue
1525
1526        # only swap unswapped
1527        # must use field_dtype.base here since for multi-element dtypes,
1528        # the .str with be '|V<N>' where <N> is the total bytes per element
1529        if field.itemsize > 1 and field_dtype.base.str[0] in swap_types:
1530            to_swap.append(field)
1531            # Override the dtype for this field in the new record dtype with
1532            # the byteswapped version
1533            formats[-1] = field_dtype.newbyteorder()
1534
1535        # deal with var length table
1536        recformat = data.columns._recformats[idx]
1537        if isinstance(recformat, _FormatP):
1538            coldata = data.field(idx)
1539            for c in coldata:
1540                if (not isinstance(c, chararray.chararray) and
1541                        c.itemsize > 1 and c.dtype.str[0] in swap_types):
1542                    to_swap.append(c)
1543
1544    for arr in reversed(to_swap):
1545        arr.byteswap(True)
1546
1547    data.dtype = np.dtype({'names': names,
1548                           'formats': formats,
1549                           'offsets': offsets})
1550
1551    yield data
1552
1553    for arr in to_swap:
1554        arr.byteswap(True)
1555
1556    data.dtype = orig_dtype
1557