1# Licensed under a 3-clause BSD style license - see PYFITS.rst 2 3 4import contextlib 5import csv 6import operator 7import os 8import re 9import sys 10import textwrap 11import warnings 12from contextlib import suppress 13 14import numpy as np 15from numpy import char as chararray 16 17from .base import DELAYED, _ValidHDU, ExtensionHDU 18# This module may have many dependencies on astropy.io.fits.column, but 19# astropy.io.fits.column has fewer dependencies overall, so it's easier to 20# keep table/column-related utilities in astropy.io.fits.column 21from astropy.io.fits.column import (FITS2NUMPY, KEYWORD_NAMES, KEYWORD_TO_ATTRIBUTE, 22 ATTRIBUTE_TO_KEYWORD, TDEF_RE, Column, ColDefs, 23 _AsciiColDefs, _FormatP, _FormatQ, _makep, 24 _parse_tformat, _scalar_to_format, _convert_format, 25 _cmp_recformats) 26from astropy.io.fits.fitsrec import FITS_rec, _get_recarray_field, _has_unicode_fields 27from astropy.io.fits.header import Header, _pad_length 28from astropy.io.fits.util import _is_int, _str_to_num 29 30from astropy.utils import lazyproperty 31from astropy.utils.exceptions import AstropyDeprecationWarning 32from astropy.utils.decorators import deprecated_renamed_argument 33 34 35class FITSTableDumpDialect(csv.excel): 36 """ 37 A CSV dialect for the Astropy format of ASCII dumps of FITS tables. 38 """ 39 40 delimiter = ' ' 41 lineterminator = '\n' 42 quotechar = '"' 43 quoting = csv.QUOTE_ALL 44 skipinitialspace = True 45 46 47class _TableLikeHDU(_ValidHDU): 48 """ 49 A class for HDUs that have table-like data. This is used for both 50 Binary/ASCII tables as well as Random Access Group HDUs (which are 51 otherwise too dissimilar for tables to use _TableBaseHDU directly). 52 """ 53 54 _data_type = FITS_rec 55 _columns_type = ColDefs 56 57 # TODO: Temporary flag representing whether uints are enabled; remove this 58 # after restructuring to support uints by default on a per-column basis 59 _uint = False 60 61 @classmethod 62 def match_header(cls, header): 63 """ 64 This is an abstract HDU type for HDUs that contain table-like data. 65 This is even more abstract than _TableBaseHDU which is specifically for 66 the standard ASCII and Binary Table types. 67 """ 68 69 raise NotImplementedError 70 71 @classmethod 72 def from_columns(cls, columns, header=None, nrows=0, fill=False, 73 character_as_bytes=False, **kwargs): 74 """ 75 Given either a `ColDefs` object, a sequence of `Column` objects, 76 or another table HDU or table data (a `FITS_rec` or multi-field 77 `numpy.ndarray` or `numpy.recarray` object, return a new table HDU of 78 the class this method was called on using the column definition from 79 the input. 80 81 See also `FITS_rec.from_columns`. 82 83 Parameters 84 ---------- 85 columns : sequence of `Column`, `ColDefs` -like 86 The columns from which to create the table data, or an object with 87 a column-like structure from which a `ColDefs` can be instantiated. 88 This includes an existing `BinTableHDU` or `TableHDU`, or a 89 `numpy.recarray` to give some examples. 90 91 If these columns have data arrays attached that data may be used in 92 initializing the new table. Otherwise the input columns will be 93 used as a template for a new table with the requested number of 94 rows. 95 96 header : `Header` 97 An optional `Header` object to instantiate the new HDU yet. Header 98 keywords specifically related to defining the table structure (such 99 as the "TXXXn" keywords like TTYPEn) will be overridden by the 100 supplied column definitions, but all other informational and data 101 model-specific keywords are kept. 102 103 nrows : int 104 Number of rows in the new table. If the input columns have data 105 associated with them, the size of the largest input column is used. 106 Otherwise the default is 0. 107 108 fill : bool 109 If `True`, will fill all cells with zeros or blanks. If `False`, 110 copy the data from input, undefined cells will still be filled with 111 zeros/blanks. 112 113 character_as_bytes : bool 114 Whether to return bytes for string columns when accessed from the 115 HDU. By default this is `False` and (unicode) strings are returned, 116 but for large tables this may use up a lot of memory. 117 118 Notes 119 ----- 120 121 Any additional keyword arguments accepted by the HDU class's 122 ``__init__`` may also be passed in as keyword arguments. 123 """ 124 125 coldefs = cls._columns_type(columns) 126 data = FITS_rec.from_columns(coldefs, nrows=nrows, fill=fill, 127 character_as_bytes=character_as_bytes) 128 hdu = cls(data=data, header=header, character_as_bytes=character_as_bytes, **kwargs) 129 coldefs._add_listener(hdu) 130 return hdu 131 132 @lazyproperty 133 def columns(self): 134 """ 135 The :class:`ColDefs` objects describing the columns in this table. 136 """ 137 138 # The base class doesn't make any assumptions about where the column 139 # definitions come from, so just return an empty ColDefs 140 return ColDefs([]) 141 142 @property 143 def _nrows(self): 144 """ 145 table-like HDUs must provide an attribute that specifies the number of 146 rows in the HDU's table. 147 148 For now this is an internal-only attribute. 149 """ 150 151 raise NotImplementedError 152 153 def _get_tbdata(self): 154 """Get the table data from an input HDU object.""" 155 156 columns = self.columns 157 158 # TODO: Details related to variable length arrays need to be dealt with 159 # specifically in the BinTableHDU class, since they're a detail 160 # specific to FITS binary tables 161 if (any(type(r) in (_FormatP, _FormatQ) 162 for r in columns._recformats) and 163 self._data_size is not None and 164 self._data_size > self._theap): 165 # We have a heap; include it in the raw_data 166 raw_data = self._get_raw_data(self._data_size, np.uint8, 167 self._data_offset) 168 tbsize = self._header['NAXIS1'] * self._header['NAXIS2'] 169 data = raw_data[:tbsize].view(dtype=columns.dtype, 170 type=np.rec.recarray) 171 else: 172 raw_data = self._get_raw_data(self._nrows, columns.dtype, 173 self._data_offset) 174 if raw_data is None: 175 # This can happen when a brand new table HDU is being created 176 # and no data has been assigned to the columns, which case just 177 # return an empty array 178 raw_data = np.array([], dtype=columns.dtype) 179 180 data = raw_data.view(np.rec.recarray) 181 182 self._init_tbdata(data) 183 data = data.view(self._data_type) 184 columns._add_listener(data) 185 return data 186 187 def _init_tbdata(self, data): 188 columns = self.columns 189 190 data.dtype = data.dtype.newbyteorder('>') 191 192 # hack to enable pseudo-uint support 193 data._uint = self._uint 194 195 # pass datLoc, for P format 196 data._heapoffset = self._theap 197 data._heapsize = self._header['PCOUNT'] 198 tbsize = self._header['NAXIS1'] * self._header['NAXIS2'] 199 data._gap = self._theap - tbsize 200 201 # pass the attributes 202 for idx, col in enumerate(columns): 203 # get the data for each column object from the rec.recarray 204 col.array = data.field(idx) 205 206 # delete the _arrays attribute so that it is recreated to point to the 207 # new data placed in the column object above 208 del columns._arrays 209 210 def _update_load_data(self): 211 """Load the data if asked to.""" 212 if not self._data_loaded: 213 self.data 214 215 def _update_column_added(self, columns, column): 216 """ 217 Update the data upon addition of a new column through the `ColDefs` 218 interface. 219 """ 220 # recreate data from the columns 221 self.data = FITS_rec.from_columns( 222 self.columns, nrows=self._nrows, fill=False, 223 character_as_bytes=self._character_as_bytes 224 ) 225 226 def _update_column_removed(self, columns, col_idx): 227 """ 228 Update the data upon removal of a column through the `ColDefs` 229 interface. 230 """ 231 # recreate data from the columns 232 self.data = FITS_rec.from_columns( 233 self.columns, nrows=self._nrows, fill=False, 234 character_as_bytes=self._character_as_bytes 235 ) 236 237 238class _TableBaseHDU(ExtensionHDU, _TableLikeHDU): 239 """ 240 FITS table extension base HDU class. 241 242 Parameters 243 ---------- 244 data : array 245 Data to be used. 246 header : `Header` instance 247 Header to be used. If the ``data`` is also specified, header keywords 248 specifically related to defining the table structure (such as the 249 "TXXXn" keywords like TTYPEn) will be overridden by the supplied column 250 definitions, but all other informational and data model-specific 251 keywords are kept. 252 name : str 253 Name to be populated in ``EXTNAME`` keyword. 254 uint : bool, optional 255 Set to `True` if the table contains unsigned integer columns. 256 ver : int > 0 or None, optional 257 The ver of the HDU, will be the value of the keyword ``EXTVER``. 258 If not given or None, it defaults to the value of the ``EXTVER`` 259 card of the ``header`` or 1. 260 (default: None) 261 character_as_bytes : bool 262 Whether to return bytes for string columns. By default this is `False` 263 and (unicode) strings are returned, but this does not respect memory 264 mapping and loads the whole column in memory when accessed. 265 """ 266 267 _manages_own_heap = False 268 """ 269 This flag implies that when writing VLA tables (P/Q format) the heap 270 pointers that go into P/Q table columns should not be reordered or 271 rearranged in any way by the default heap management code. 272 273 This is included primarily as an optimization for compressed image HDUs 274 which perform their own heap maintenance. 275 """ 276 277 def __init__(self, data=None, header=None, name=None, uint=False, ver=None, 278 character_as_bytes=False): 279 280 super().__init__(data=data, header=header, name=name, ver=ver) 281 282 self._uint = uint 283 self._character_as_bytes = character_as_bytes 284 285 if data is DELAYED: 286 # this should never happen 287 if header is None: 288 raise ValueError('No header to setup HDU.') 289 290 # if the file is read the first time, no need to copy, and keep it 291 # unchanged 292 else: 293 self._header = header 294 else: 295 # construct a list of cards of minimal header 296 cards = [ 297 ('XTENSION', self._extension, self._ext_comment), 298 ('BITPIX', 8, 'array data type'), 299 ('NAXIS', 2, 'number of array dimensions'), 300 ('NAXIS1', 0, 'length of dimension 1'), 301 ('NAXIS2', 0, 'length of dimension 2'), 302 ('PCOUNT', 0, 'number of group parameters'), 303 ('GCOUNT', 1, 'number of groups'), 304 ('TFIELDS', 0, 'number of table fields')] 305 306 if header is not None: 307 308 # Make a "copy" (not just a view) of the input header, since it 309 # may get modified. the data is still a "view" (for now) 310 hcopy = header.copy(strip=True) 311 cards.extend(hcopy.cards) 312 313 self._header = Header(cards) 314 315 if isinstance(data, np.ndarray) and data.dtype.fields is not None: 316 # self._data_type is FITS_rec. 317 if isinstance(data, self._data_type): 318 self.data = data 319 else: 320 self.data = self._data_type.from_columns(data) 321 322 # TEMP: Special column keywords are normally overwritten by attributes 323 # from Column objects. In Astropy 3.0, several new keywords are now 324 # recognized as being special column keywords, but we don't 325 # automatically clear them yet, as we need to raise a deprecation 326 # warning for at least one major version. 327 if header is not None: 328 future_ignore = set() 329 for keyword in header.keys(): 330 match = TDEF_RE.match(keyword) 331 try: 332 base_keyword = match.group('label') 333 except Exception: 334 continue # skip if there is no match 335 if base_keyword in {'TCTYP', 'TCUNI', 'TCRPX', 'TCRVL', 'TCDLT', 'TRPOS'}: 336 future_ignore.add(base_keyword) 337 if future_ignore: 338 keys = ', '.join(x + 'n' for x in sorted(future_ignore)) 339 warnings.warn("The following keywords are now recognized as special " 340 "column-related attributes and should be set via the " 341 "Column objects: {}. In future, these values will be " 342 "dropped from manually specified headers automatically " 343 "and replaced with values generated based on the " 344 "Column objects.".format(keys), AstropyDeprecationWarning) 345 346 # TODO: Too much of the code in this class uses header keywords 347 # in making calculations related to the data size. This is 348 # unreliable, however, in cases when users mess with the header 349 # unintentionally--code that does this should be cleaned up. 350 self._header['NAXIS1'] = self.data._raw_itemsize 351 self._header['NAXIS2'] = self.data.shape[0] 352 self._header['TFIELDS'] = len(self.data._coldefs) 353 354 self.columns = self.data._coldefs 355 self.columns._add_listener(self.data) 356 self.update() 357 358 with suppress(TypeError, AttributeError): 359 # Make the ndarrays in the Column objects of the ColDefs 360 # object of the HDU reference the same ndarray as the HDU's 361 # FITS_rec object. 362 for idx, col in enumerate(self.columns): 363 col.array = self.data.field(idx) 364 365 # Delete the _arrays attribute so that it is recreated to 366 # point to the new data placed in the column objects above 367 del self.columns._arrays 368 elif data is None: 369 pass 370 else: 371 raise TypeError('Table data has incorrect type.') 372 373 # Ensure that the correct EXTNAME is set on the new header if one was 374 # created, or that it overrides the existing EXTNAME if different 375 if name: 376 self.name = name 377 if ver is not None: 378 self.ver = ver 379 380 @classmethod 381 def match_header(cls, header): 382 """ 383 This is an abstract type that implements the shared functionality of 384 the ASCII and Binary Table HDU types, which should be used instead of 385 this. 386 """ 387 388 raise NotImplementedError 389 390 @lazyproperty 391 def columns(self): 392 """ 393 The :class:`ColDefs` objects describing the columns in this table. 394 """ 395 396 if self._has_data and hasattr(self.data, '_coldefs'): 397 return self.data._coldefs 398 return self._columns_type(self) 399 400 @lazyproperty 401 def data(self): 402 data = self._get_tbdata() 403 data._coldefs = self.columns 404 data._character_as_bytes = self._character_as_bytes 405 # Columns should now just return a reference to the data._coldefs 406 del self.columns 407 return data 408 409 @data.setter 410 def data(self, data): 411 if 'data' in self.__dict__: 412 if self.__dict__['data'] is data: 413 return 414 else: 415 self._data_replaced = True 416 else: 417 self._data_replaced = True 418 419 self._modified = True 420 421 if data is None and self.columns: 422 # Create a new table with the same columns, but empty rows 423 formats = ','.join(self.columns._recformats) 424 data = np.rec.array(None, formats=formats, 425 names=self.columns.names, 426 shape=0) 427 428 if isinstance(data, np.ndarray) and data.dtype.fields is not None: 429 # Go ahead and always make a view, even if the data is already the 430 # correct class (self._data_type) so we can update things like the 431 # column defs, if necessary 432 data = data.view(self._data_type) 433 434 if not isinstance(data.columns, self._columns_type): 435 # This would be the place, if the input data was for an ASCII 436 # table and this is binary table, or vice versa, to convert the 437 # data to the appropriate format for the table type 438 new_columns = self._columns_type(data.columns) 439 data = FITS_rec.from_columns(new_columns) 440 441 if 'data' in self.__dict__: 442 self.columns._remove_listener(self.__dict__['data']) 443 self.__dict__['data'] = data 444 445 self.columns = self.data.columns 446 self.columns._add_listener(self.data) 447 self.update() 448 449 with suppress(TypeError, AttributeError): 450 # Make the ndarrays in the Column objects of the ColDefs 451 # object of the HDU reference the same ndarray as the HDU's 452 # FITS_rec object. 453 for idx, col in enumerate(self.columns): 454 col.array = self.data.field(idx) 455 456 # Delete the _arrays attribute so that it is recreated to 457 # point to the new data placed in the column objects above 458 del self.columns._arrays 459 elif data is None: 460 pass 461 else: 462 raise TypeError('Table data has incorrect type.') 463 464 # returning the data signals to lazyproperty that we've already handled 465 # setting self.__dict__['data'] 466 return data 467 468 @property 469 def _nrows(self): 470 if not self._data_loaded: 471 return self._header.get('NAXIS2', 0) 472 else: 473 return len(self.data) 474 475 @lazyproperty 476 def _theap(self): 477 size = self._header['NAXIS1'] * self._header['NAXIS2'] 478 return self._header.get('THEAP', size) 479 480 # TODO: Need to either rename this to update_header, for symmetry with the 481 # Image HDUs, or just at some point deprecate it and remove it altogether, 482 # since header updates should occur automatically when necessary... 483 def update(self): 484 """ 485 Update header keywords to reflect recent changes of columns. 486 """ 487 488 self._header.set('NAXIS1', self.data._raw_itemsize, after='NAXIS') 489 self._header.set('NAXIS2', self.data.shape[0], after='NAXIS1') 490 self._header.set('TFIELDS', len(self.columns), after='GCOUNT') 491 492 self._clear_table_keywords() 493 self._populate_table_keywords() 494 495 def copy(self): 496 """ 497 Make a copy of the table HDU, both header and data are copied. 498 """ 499 500 # touch the data, so it's defined (in the case of reading from a 501 # FITS file) 502 return self.__class__(data=self.data.copy(), 503 header=self._header.copy()) 504 505 def _prewriteto(self, checksum=False, inplace=False): 506 if self._has_data: 507 self.data._scale_back( 508 update_heap_pointers=not self._manages_own_heap) 509 # check TFIELDS and NAXIS2 510 self._header['TFIELDS'] = len(self.data._coldefs) 511 self._header['NAXIS2'] = self.data.shape[0] 512 513 # calculate PCOUNT, for variable length tables 514 tbsize = self._header['NAXIS1'] * self._header['NAXIS2'] 515 heapstart = self._header.get('THEAP', tbsize) 516 self.data._gap = heapstart - tbsize 517 pcount = self.data._heapsize + self.data._gap 518 if pcount > 0: 519 self._header['PCOUNT'] = pcount 520 521 # update the other T****n keywords 522 self._populate_table_keywords() 523 524 # update TFORM for variable length columns 525 for idx in range(self.data._nfields): 526 format = self.data._coldefs._recformats[idx] 527 if isinstance(format, _FormatP): 528 _max = self.data.field(idx).max 529 # May be either _FormatP or _FormatQ 530 format_cls = format.__class__ 531 format = format_cls(format.dtype, repeat=format.repeat, 532 max=_max) 533 self._header['TFORM' + str(idx + 1)] = format.tform 534 return super()._prewriteto(checksum, inplace) 535 536 def _verify(self, option='warn'): 537 """ 538 _TableBaseHDU verify method. 539 """ 540 541 errs = super()._verify(option=option) 542 if not (isinstance(self._header[0], str) and 543 self._header[0].rstrip() == self._extension): 544 545 err_text = 'The XTENSION keyword must match the HDU type.' 546 fix_text = f'Converted the XTENSION keyword to {self._extension}.' 547 548 def fix(header=self._header): 549 header[0] = (self._extension, self._ext_comment) 550 551 errs.append(self.run_option(option, err_text=err_text, 552 fix_text=fix_text, fix=fix)) 553 554 self.req_cards('NAXIS', None, lambda v: (v == 2), 2, option, errs) 555 self.req_cards('BITPIX', None, lambda v: (v == 8), 8, option, errs) 556 self.req_cards('TFIELDS', 7, 557 lambda v: (_is_int(v) and v >= 0 and v <= 999), 0, 558 option, errs) 559 tfields = self._header['TFIELDS'] 560 for idx in range(tfields): 561 self.req_cards('TFORM' + str(idx + 1), None, None, None, option, 562 errs) 563 return errs 564 565 def _summary(self): 566 """ 567 Summarize the HDU: name, dimensions, and formats. 568 """ 569 570 class_name = self.__class__.__name__ 571 572 # if data is touched, use data info. 573 if self._data_loaded: 574 if self.data is None: 575 nrows = 0 576 else: 577 nrows = len(self.data) 578 579 ncols = len(self.columns) 580 format = self.columns.formats 581 582 # if data is not touched yet, use header info. 583 else: 584 nrows = self._header['NAXIS2'] 585 ncols = self._header['TFIELDS'] 586 format = ', '.join([self._header['TFORM' + str(j + 1)] 587 for j in range(ncols)]) 588 format = f'[{format}]' 589 dims = f"{nrows}R x {ncols}C" 590 ncards = len(self._header) 591 592 return (self.name, self.ver, class_name, ncards, dims, format) 593 594 def _update_column_removed(self, columns, idx): 595 super()._update_column_removed(columns, idx) 596 597 # Fix the header to reflect the column removal 598 self._clear_table_keywords(index=idx) 599 600 def _update_column_attribute_changed(self, column, col_idx, attr, 601 old_value, new_value): 602 """ 603 Update the header when one of the column objects is updated. 604 """ 605 606 # base_keyword is the keyword without the index such as TDIM 607 # while keyword is like TDIM1 608 base_keyword = ATTRIBUTE_TO_KEYWORD[attr] 609 keyword = base_keyword + str(col_idx + 1) 610 611 if keyword in self._header: 612 if new_value is None: 613 # If the new value is None, i.e. None was assigned to the 614 # column attribute, then treat this as equivalent to deleting 615 # that attribute 616 del self._header[keyword] 617 else: 618 self._header[keyword] = new_value 619 else: 620 keyword_idx = KEYWORD_NAMES.index(base_keyword) 621 # Determine the appropriate keyword to insert this one before/after 622 # if it did not already exist in the header 623 for before_keyword in reversed(KEYWORD_NAMES[:keyword_idx]): 624 before_keyword += str(col_idx + 1) 625 if before_keyword in self._header: 626 self._header.insert(before_keyword, (keyword, new_value), 627 after=True) 628 break 629 else: 630 for after_keyword in KEYWORD_NAMES[keyword_idx + 1:]: 631 after_keyword += str(col_idx + 1) 632 if after_keyword in self._header: 633 self._header.insert(after_keyword, 634 (keyword, new_value)) 635 break 636 else: 637 # Just append 638 self._header[keyword] = new_value 639 640 def _clear_table_keywords(self, index=None): 641 """ 642 Wipe out any existing table definition keywords from the header. 643 644 If specified, only clear keywords for the given table index (shifting 645 up keywords for any other columns). The index is zero-based. 646 Otherwise keywords for all columns. 647 """ 648 649 # First collect all the table structure related keyword in the header 650 # into a single list so we can then sort them by index, which will be 651 # useful later for updating the header in a sensible order (since the 652 # header *might* not already be written in a reasonable order) 653 table_keywords = [] 654 655 for idx, keyword in enumerate(self._header.keys()): 656 match = TDEF_RE.match(keyword) 657 try: 658 base_keyword = match.group('label') 659 except Exception: 660 continue # skip if there is no match 661 662 if base_keyword in KEYWORD_TO_ATTRIBUTE: 663 664 # TEMP: For Astropy 3.0 we don't clear away the following keywords 665 # as we are first raising a deprecation warning that these will be 666 # dropped automatically if they were specified in the header. We 667 # can remove this once we are happy to break backward-compatibility 668 if base_keyword in {'TCTYP', 'TCUNI', 'TCRPX', 'TCRVL', 'TCDLT', 'TRPOS'}: 669 continue 670 671 num = int(match.group('num')) - 1 # convert to zero-base 672 table_keywords.append((idx, match.group(0), base_keyword, 673 num)) 674 675 # First delete 676 rev_sorted_idx_0 = sorted(table_keywords, key=operator.itemgetter(0), 677 reverse=True) 678 for idx, keyword, _, num in rev_sorted_idx_0: 679 if index is None or index == num: 680 del self._header[idx] 681 682 # Now shift up remaining column keywords if only one column was cleared 683 if index is not None: 684 sorted_idx_3 = sorted(table_keywords, key=operator.itemgetter(3)) 685 for _, keyword, base_keyword, num in sorted_idx_3: 686 if num <= index: 687 continue 688 689 old_card = self._header.cards[keyword] 690 new_card = (base_keyword + str(num), old_card.value, 691 old_card.comment) 692 self._header.insert(keyword, new_card) 693 del self._header[keyword] 694 695 # Also decrement TFIELDS 696 if 'TFIELDS' in self._header: 697 self._header['TFIELDS'] -= 1 698 699 def _populate_table_keywords(self): 700 """Populate the new table definition keywords from the header.""" 701 702 for idx, column in enumerate(self.columns): 703 for keyword, attr in KEYWORD_TO_ATTRIBUTE.items(): 704 val = getattr(column, attr) 705 if val is not None: 706 keyword = keyword + str(idx + 1) 707 self._header[keyword] = val 708 709 710class TableHDU(_TableBaseHDU): 711 """ 712 FITS ASCII table extension HDU class. 713 714 Parameters 715 ---------- 716 data : array or `FITS_rec` 717 Data to be used. 718 header : `Header` 719 Header to be used. 720 name : str 721 Name to be populated in ``EXTNAME`` keyword. 722 ver : int > 0 or None, optional 723 The ver of the HDU, will be the value of the keyword ``EXTVER``. 724 If not given or None, it defaults to the value of the ``EXTVER`` 725 card of the ``header`` or 1. 726 (default: None) 727 character_as_bytes : bool 728 Whether to return bytes for string columns. By default this is `False` 729 and (unicode) strings are returned, but this does not respect memory 730 mapping and loads the whole column in memory when accessed. 731 732 """ 733 734 _extension = 'TABLE' 735 _ext_comment = 'ASCII table extension' 736 737 _padding_byte = ' ' 738 _columns_type = _AsciiColDefs 739 740 __format_RE = re.compile( 741 r'(?P<code>[ADEFIJ])(?P<width>\d+)(?:\.(?P<prec>\d+))?') 742 743 def __init__(self, data=None, header=None, name=None, ver=None, character_as_bytes=False): 744 super().__init__(data, header, name=name, ver=ver, character_as_bytes=character_as_bytes) 745 746 @classmethod 747 def match_header(cls, header): 748 card = header.cards[0] 749 xtension = card.value 750 if isinstance(xtension, str): 751 xtension = xtension.rstrip() 752 return card.keyword == 'XTENSION' and xtension == cls._extension 753 754 def _get_tbdata(self): 755 columns = self.columns 756 names = [n for idx, n in enumerate(columns.names)] 757 758 # determine if there are duplicate field names and if there 759 # are throw an exception 760 dup = np.rec.find_duplicate(names) 761 762 if dup: 763 raise ValueError(f"Duplicate field names: {dup}") 764 765 # TODO: Determine if this extra logic is necessary--I feel like the 766 # _AsciiColDefs class should be responsible for telling the table what 767 # its dtype should be... 768 itemsize = columns.spans[-1] + columns.starts[-1] - 1 769 dtype = {} 770 771 for idx in range(len(columns)): 772 data_type = 'S' + str(columns.spans[idx]) 773 774 if idx == len(columns) - 1: 775 # The last column is padded out to the value of NAXIS1 776 if self._header['NAXIS1'] > itemsize: 777 data_type = 'S' + str(columns.spans[idx] + 778 self._header['NAXIS1'] - itemsize) 779 dtype[columns.names[idx]] = (data_type, columns.starts[idx] - 1) 780 781 raw_data = self._get_raw_data(self._nrows, dtype, self._data_offset) 782 data = raw_data.view(np.rec.recarray) 783 self._init_tbdata(data) 784 return data.view(self._data_type) 785 786 def _calculate_datasum(self): 787 """ 788 Calculate the value for the ``DATASUM`` card in the HDU. 789 """ 790 791 if self._has_data: 792 # We have the data to be used. 793 # We need to pad the data to a block length before calculating 794 # the datasum. 795 bytes_array = self.data.view(type=np.ndarray, dtype=np.ubyte) 796 padding = np.frombuffer(_pad_length(self.size) * b' ', 797 dtype=np.ubyte) 798 799 d = np.append(bytes_array, padding) 800 801 cs = self._compute_checksum(d) 802 return cs 803 else: 804 # This is the case where the data has not been read from the file 805 # yet. We can handle that in a generic manner so we do it in the 806 # base class. The other possibility is that there is no data at 807 # all. This can also be handled in a generic manner. 808 return super()._calculate_datasum() 809 810 def _verify(self, option='warn'): 811 """ 812 `TableHDU` verify method. 813 """ 814 815 errs = super()._verify(option=option) 816 self.req_cards('PCOUNT', None, lambda v: (v == 0), 0, option, errs) 817 tfields = self._header['TFIELDS'] 818 for idx in range(tfields): 819 self.req_cards('TBCOL' + str(idx + 1), None, _is_int, None, option, 820 errs) 821 return errs 822 823 824class BinTableHDU(_TableBaseHDU): 825 """ 826 Binary table HDU class. 827 828 Parameters 829 ---------- 830 data : array, `FITS_rec`, or `~astropy.table.Table` 831 Data to be used. 832 header : `Header` 833 Header to be used. 834 name : str 835 Name to be populated in ``EXTNAME`` keyword. 836 uint : bool, optional 837 Set to `True` if the table contains unsigned integer columns. 838 ver : int > 0 or None, optional 839 The ver of the HDU, will be the value of the keyword ``EXTVER``. 840 If not given or None, it defaults to the value of the ``EXTVER`` 841 card of the ``header`` or 1. 842 (default: None) 843 character_as_bytes : bool 844 Whether to return bytes for string columns. By default this is `False` 845 and (unicode) strings are returned, but this does not respect memory 846 mapping and loads the whole column in memory when accessed. 847 848 """ 849 850 _extension = 'BINTABLE' 851 _ext_comment = 'binary table extension' 852 853 def __init__(self, data=None, header=None, name=None, uint=False, ver=None, 854 character_as_bytes=False): 855 from astropy.table import Table 856 if isinstance(data, Table): 857 from astropy.io.fits.convenience import table_to_hdu 858 hdu = table_to_hdu(data) 859 if header is not None: 860 hdu.header.update(header) 861 data = hdu.data 862 header = hdu.header 863 864 super().__init__(data, header, name=name, uint=uint, ver=ver, 865 character_as_bytes=character_as_bytes) 866 867 @classmethod 868 def match_header(cls, header): 869 card = header.cards[0] 870 xtension = card.value 871 if isinstance(xtension, str): 872 xtension = xtension.rstrip() 873 return (card.keyword == 'XTENSION' and 874 xtension in (cls._extension, 'A3DTABLE')) 875 876 def _calculate_datasum_with_heap(self): 877 """ 878 Calculate the value for the ``DATASUM`` card given the input data 879 """ 880 881 with _binary_table_byte_swap(self.data) as data: 882 dout = data.view(type=np.ndarray, dtype=np.ubyte) 883 csum = self._compute_checksum(dout) 884 885 # Now add in the heap data to the checksum (we can skip any gap 886 # between the table and the heap since it's all zeros and doesn't 887 # contribute to the checksum 888 if data._get_raw_data() is None: 889 # This block is still needed because 890 # test_variable_length_table_data leads to ._get_raw_data 891 # returning None which means _get_heap_data doesn't work. 892 # Which happens when the data is loaded in memory rather than 893 # being unloaded on disk 894 for idx in range(data._nfields): 895 if isinstance(data.columns._recformats[idx], _FormatP): 896 for coldata in data.field(idx): 897 # coldata should already be byteswapped from the call 898 # to _binary_table_byte_swap 899 if not len(coldata): 900 continue 901 902 csum = self._compute_checksum(coldata, csum) 903 else: 904 csum = self._compute_checksum(data._get_heap_data(), csum) 905 906 return csum 907 908 def _calculate_datasum(self): 909 """ 910 Calculate the value for the ``DATASUM`` card in the HDU. 911 """ 912 913 if self._has_data: 914 # This method calculates the datasum while incorporating any 915 # heap data, which is obviously not handled from the base 916 # _calculate_datasum 917 return self._calculate_datasum_with_heap() 918 else: 919 # This is the case where the data has not been read from the file 920 # yet. We can handle that in a generic manner so we do it in the 921 # base class. The other possibility is that there is no data at 922 # all. This can also be handled in a generic manner. 923 return super()._calculate_datasum() 924 925 def _writedata_internal(self, fileobj): 926 size = 0 927 928 if self.data is None: 929 return size 930 931 with _binary_table_byte_swap(self.data) as data: 932 if _has_unicode_fields(data): 933 # If the raw data was a user-supplied recarray, we can't write 934 # unicode columns directly to the file, so we have to switch 935 # to a slower row-by-row write 936 self._writedata_by_row(fileobj) 937 else: 938 fileobj.writearray(data) 939 # write out the heap of variable length array columns this has 940 # to be done after the "regular" data is written (above) 941 # to avoid a bug in the lustre filesystem client, don't 942 # write 0-byte objects 943 if data._gap > 0: 944 fileobj.write((data._gap * '\0').encode('ascii')) 945 946 nbytes = data._gap 947 948 if not self._manages_own_heap: 949 # Write the heap data one column at a time, in the order 950 # that the data pointers appear in the column (regardless 951 # if that data pointer has a different, previous heap 952 # offset listed) 953 for idx in range(data._nfields): 954 if not isinstance(data.columns._recformats[idx], 955 _FormatP): 956 continue 957 958 field = self.data.field(idx) 959 for row in field: 960 if len(row) > 0: 961 nbytes += row.nbytes 962 fileobj.writearray(row) 963 else: 964 heap_data = data._get_heap_data() 965 if len(heap_data) > 0: 966 nbytes += len(heap_data) 967 fileobj.writearray(heap_data) 968 969 data._heapsize = nbytes - data._gap 970 size += nbytes 971 972 size += self.data.size * self.data._raw_itemsize 973 974 return size 975 976 def _writedata_by_row(self, fileobj): 977 fields = [self.data.field(idx) 978 for idx in range(len(self.data.columns))] 979 980 # Creating Record objects is expensive (as in 981 # `for row in self.data:` so instead we just iterate over the row 982 # indices and get one field at a time: 983 for idx in range(len(self.data)): 984 for field in fields: 985 item = field[idx] 986 field_width = None 987 988 if field.dtype.kind == 'U': 989 # Read the field *width* by reading past the field kind. 990 i = field.dtype.str.index(field.dtype.kind) 991 field_width = int(field.dtype.str[i+1:]) 992 item = np.char.encode(item, 'ascii') 993 994 fileobj.writearray(item) 995 if field_width is not None: 996 j = item.dtype.str.index(item.dtype.kind) 997 item_length = int(item.dtype.str[j+1:]) 998 # Fix padding problem (see #5296). 999 padding = '\x00'*(field_width - item_length) 1000 fileobj.write(padding.encode('ascii')) 1001 1002 _tdump_file_format = textwrap.dedent(""" 1003 1004 - **datafile:** Each line of the data file represents one row of table 1005 data. The data is output one column at a time in column order. If 1006 a column contains an array, each element of the column array in the 1007 current row is output before moving on to the next column. Each row 1008 ends with a new line. 1009 1010 Integer data is output right-justified in a 21-character field 1011 followed by a blank. Floating point data is output right justified 1012 using 'g' format in a 21-character field with 15 digits of 1013 precision, followed by a blank. String data that does not contain 1014 whitespace is output left-justified in a field whose width matches 1015 the width specified in the ``TFORM`` header parameter for the 1016 column, followed by a blank. When the string data contains 1017 whitespace characters, the string is enclosed in quotation marks 1018 (``""``). For the last data element in a row, the trailing blank in 1019 the field is replaced by a new line character. 1020 1021 For column data containing variable length arrays ('P' format), the 1022 array data is preceded by the string ``'VLA_Length= '`` and the 1023 integer length of the array for that row, left-justified in a 1024 21-character field, followed by a blank. 1025 1026 .. note:: 1027 1028 This format does *not* support variable length arrays using the 1029 ('Q' format) due to difficult to overcome ambiguities. What this 1030 means is that this file format cannot support VLA columns in 1031 tables stored in files that are over 2 GB in size. 1032 1033 For column data representing a bit field ('X' format), each bit 1034 value in the field is output right-justified in a 21-character field 1035 as 1 (for true) or 0 (for false). 1036 1037 - **cdfile:** Each line of the column definitions file provides the 1038 definitions for one column in the table. The line is broken up into 1039 8, sixteen-character fields. The first field provides the column 1040 name (``TTYPEn``). The second field provides the column format 1041 (``TFORMn``). The third field provides the display format 1042 (``TDISPn``). The fourth field provides the physical units 1043 (``TUNITn``). The fifth field provides the dimensions for a 1044 multidimensional array (``TDIMn``). The sixth field provides the 1045 value that signifies an undefined value (``TNULLn``). The seventh 1046 field provides the scale factor (``TSCALn``). The eighth field 1047 provides the offset value (``TZEROn``). A field value of ``""`` is 1048 used to represent the case where no value is provided. 1049 1050 - **hfile:** Each line of the header parameters file provides the 1051 definition of a single HDU header card as represented by the card 1052 image. 1053 """) 1054 1055 @deprecated_renamed_argument('clobber', 'overwrite', '2.0', 1056 message='"clobber" was deprecated in version ' 1057 '2.0 and will be removed in version ' 1058 '5.1. Use argument "overwrite" ' 1059 'instead.') 1060 def dump(self, datafile=None, cdfile=None, hfile=None, overwrite=False): 1061 """ 1062 Dump the table HDU to a file in ASCII format. The table may be dumped 1063 in three separate files, one containing column definitions, one 1064 containing header parameters, and one for table data. 1065 1066 Parameters 1067 ---------- 1068 datafile : path-like or file-like, optional 1069 Output data file. The default is the root name of the 1070 fits file associated with this HDU appended with the 1071 extension ``.txt``. 1072 1073 cdfile : path-like or file-like, optional 1074 Output column definitions file. The default is `None`, no 1075 column definitions output is produced. 1076 1077 hfile : path-like or file-like, optional 1078 Output header parameters file. The default is `None`, 1079 no header parameters output is produced. 1080 1081 overwrite : bool, optional 1082 If ``True``, overwrite the output file if it exists. Raises an 1083 ``OSError`` if ``False`` and the output file exists. Default is 1084 ``False``. 1085 1086 .. versionchanged:: 1.3 1087 ``overwrite`` replaces the deprecated ``clobber`` argument. 1088 1089 Notes 1090 ----- 1091 The primary use for the `dump` method is to allow viewing and editing 1092 the table data and parameters in a standard text editor. 1093 The `load` method can be used to create a new table from the three 1094 plain text (ASCII) files. 1095 """ 1096 1097 # check if the output files already exist 1098 exist = [] 1099 files = [datafile, cdfile, hfile] 1100 1101 for f in files: 1102 if isinstance(f, str): 1103 if os.path.exists(f) and os.path.getsize(f) != 0: 1104 if overwrite: 1105 os.remove(f) 1106 else: 1107 exist.append(f) 1108 1109 if exist: 1110 raise OSError(' '.join([f"File '{f}' already exists." 1111 for f in exist])+" If you mean to " 1112 "replace the file(s) " 1113 "then use the argument " 1114 "'overwrite=True'.") 1115 1116 # Process the data 1117 self._dump_data(datafile) 1118 1119 # Process the column definitions 1120 if cdfile: 1121 self._dump_coldefs(cdfile) 1122 1123 # Process the header parameters 1124 if hfile: 1125 self._header.tofile(hfile, sep='\n', endcard=False, padding=False) 1126 1127 if isinstance(dump.__doc__, str): 1128 dump.__doc__ += _tdump_file_format.replace('\n', '\n ') 1129 1130 def load(cls, datafile, cdfile=None, hfile=None, replace=False, 1131 header=None): 1132 """ 1133 Create a table from the input ASCII files. The input is from up to 1134 three separate files, one containing column definitions, one containing 1135 header parameters, and one containing column data. 1136 1137 The column definition and header parameters files are not required. 1138 When absent the column definitions and/or header parameters are taken 1139 from the header object given in the header argument; otherwise sensible 1140 defaults are inferred (though this mode is not recommended). 1141 1142 Parameters 1143 ---------- 1144 datafile : path-like or file-like 1145 Input data file containing the table data in ASCII format. 1146 1147 cdfile : path-like or file-like, optional 1148 Input column definition file containing the names, 1149 formats, display formats, physical units, multidimensional 1150 array dimensions, undefined values, scale factors, and 1151 offsets associated with the columns in the table. If 1152 `None`, the column definitions are taken from the current 1153 values in this object. 1154 1155 hfile : path-like or file-like, optional 1156 Input parameter definition file containing the header 1157 parameter definitions to be associated with the table. If 1158 `None`, the header parameter definitions are taken from 1159 the current values in this objects header. 1160 1161 replace : bool, optional 1162 When `True`, indicates that the entire header should be 1163 replaced with the contents of the ASCII file instead of 1164 just updating the current header. 1165 1166 header : `~astropy.io.fits.Header`, optional 1167 When the cdfile and hfile are missing, use this Header object in 1168 the creation of the new table and HDU. Otherwise this Header 1169 supersedes the keywords from hfile, which is only used to update 1170 values not present in this Header, unless ``replace=True`` in which 1171 this Header's values are completely replaced with the values from 1172 hfile. 1173 1174 Notes 1175 ----- 1176 The primary use for the `load` method is to allow the input of ASCII 1177 data that was edited in a standard text editor of the table data and 1178 parameters. The `dump` method can be used to create the initial ASCII 1179 files. 1180 """ 1181 1182 # Process the parameter file 1183 if header is None: 1184 header = Header() 1185 1186 if hfile: 1187 if replace: 1188 header = Header.fromtextfile(hfile) 1189 else: 1190 header.extend(Header.fromtextfile(hfile), update=True, 1191 update_first=True) 1192 1193 coldefs = None 1194 # Process the column definitions file 1195 if cdfile: 1196 coldefs = cls._load_coldefs(cdfile) 1197 1198 # Process the data file 1199 data = cls._load_data(datafile, coldefs) 1200 if coldefs is None: 1201 coldefs = ColDefs(data) 1202 1203 # Create a new HDU using the supplied header and data 1204 hdu = cls(data=data, header=header) 1205 hdu.columns = coldefs 1206 return hdu 1207 1208 if isinstance(load.__doc__, str): 1209 load.__doc__ += _tdump_file_format.replace('\n', '\n ') 1210 1211 load = classmethod(load) 1212 # Have to create a classmethod from this here instead of as a decorator; 1213 # otherwise we can't update __doc__ 1214 1215 def _dump_data(self, fileobj): 1216 """ 1217 Write the table data in the ASCII format read by BinTableHDU.load() 1218 to fileobj. 1219 """ 1220 1221 if not fileobj and self._file: 1222 root = os.path.splitext(self._file.name)[0] 1223 fileobj = root + '.txt' 1224 1225 close_file = False 1226 1227 if isinstance(fileobj, str): 1228 fileobj = open(fileobj, 'w') 1229 close_file = True 1230 1231 linewriter = csv.writer(fileobj, dialect=FITSTableDumpDialect) 1232 1233 # Process each row of the table and output one row at a time 1234 def format_value(val, format): 1235 if format[0] == 'S': 1236 itemsize = int(format[1:]) 1237 return '{:{size}}'.format(val, size=itemsize) 1238 elif format in np.typecodes['AllInteger']: 1239 # output integer 1240 return f'{val:21d}' 1241 elif format in np.typecodes['Complex']: 1242 return f'{val.real:21.15g}+{val.imag:.15g}j' 1243 elif format in np.typecodes['Float']: 1244 # output floating point 1245 return f'{val:#21.15g}' 1246 1247 for row in self.data: 1248 line = [] # the line for this row of the table 1249 1250 # Process each column of the row. 1251 for column in self.columns: 1252 # format of data in a variable length array 1253 # where None means it is not a VLA: 1254 vla_format = None 1255 format = _convert_format(column.format) 1256 1257 if isinstance(format, _FormatP): 1258 # P format means this is a variable length array so output 1259 # the length of the array for this row and set the format 1260 # for the VLA data 1261 line.append('VLA_Length=') 1262 line.append(f'{len(row[column.name]):21d}') 1263 _, dtype, option = _parse_tformat(column.format) 1264 vla_format = FITS2NUMPY[option[0]][0] 1265 1266 if vla_format: 1267 # Output the data for each element in the array 1268 for val in row[column.name].flat: 1269 line.append(format_value(val, vla_format)) 1270 else: 1271 # The column data is a single element 1272 dtype = self.data.dtype.fields[column.name][0] 1273 array_format = dtype.char 1274 if array_format == 'V': 1275 array_format = dtype.base.char 1276 if array_format == 'S': 1277 array_format += str(dtype.itemsize) 1278 1279 if dtype.char == 'V': 1280 for value in row[column.name].flat: 1281 line.append(format_value(value, array_format)) 1282 else: 1283 line.append(format_value(row[column.name], 1284 array_format)) 1285 linewriter.writerow(line) 1286 if close_file: 1287 fileobj.close() 1288 1289 def _dump_coldefs(self, fileobj): 1290 """ 1291 Write the column definition parameters in the ASCII format read by 1292 BinTableHDU.load() to fileobj. 1293 """ 1294 1295 close_file = False 1296 1297 if isinstance(fileobj, str): 1298 fileobj = open(fileobj, 'w') 1299 close_file = True 1300 1301 # Process each column of the table and output the result to the 1302 # file one at a time 1303 for column in self.columns: 1304 line = [column.name, column.format] 1305 attrs = ['disp', 'unit', 'dim', 'null', 'bscale', 'bzero'] 1306 line += ['{:16s}'.format(value if value else '""') 1307 for value in (getattr(column, attr) for attr in attrs)] 1308 fileobj.write(' '.join(line)) 1309 fileobj.write('\n') 1310 1311 if close_file: 1312 fileobj.close() 1313 1314 @classmethod 1315 def _load_data(cls, fileobj, coldefs=None): 1316 """ 1317 Read the table data from the ASCII file output by BinTableHDU.dump(). 1318 """ 1319 1320 close_file = False 1321 1322 if isinstance(fileobj, str): 1323 fileobj = open(fileobj, 'r') 1324 close_file = True 1325 1326 initialpos = fileobj.tell() # We'll be returning here later 1327 linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect) 1328 1329 # First we need to do some preprocessing on the file to find out how 1330 # much memory we'll need to reserve for the table. This is necessary 1331 # even if we already have the coldefs in order to determine how many 1332 # rows to reserve memory for 1333 vla_lengths = [] 1334 recformats = [] 1335 names = [] 1336 nrows = 0 1337 if coldefs is not None: 1338 recformats = coldefs._recformats 1339 names = coldefs.names 1340 1341 def update_recformats(value, idx): 1342 fitsformat = _scalar_to_format(value) 1343 recformat = _convert_format(fitsformat) 1344 if idx >= len(recformats): 1345 recformats.append(recformat) 1346 else: 1347 if _cmp_recformats(recformats[idx], recformat) < 0: 1348 recformats[idx] = recformat 1349 1350 # TODO: The handling of VLAs could probably be simplified a bit 1351 for row in linereader: 1352 nrows += 1 1353 if coldefs is not None: 1354 continue 1355 col = 0 1356 idx = 0 1357 while idx < len(row): 1358 if row[idx] == 'VLA_Length=': 1359 if col < len(vla_lengths): 1360 vla_length = vla_lengths[col] 1361 else: 1362 vla_length = int(row[idx + 1]) 1363 vla_lengths.append(vla_length) 1364 idx += 2 1365 while vla_length: 1366 update_recformats(row[idx], col) 1367 vla_length -= 1 1368 idx += 1 1369 col += 1 1370 else: 1371 if col >= len(vla_lengths): 1372 vla_lengths.append(None) 1373 update_recformats(row[idx], col) 1374 col += 1 1375 idx += 1 1376 1377 # Update the recformats for any VLAs 1378 for idx, length in enumerate(vla_lengths): 1379 if length is not None: 1380 recformats[idx] = str(length) + recformats[idx] 1381 1382 dtype = np.rec.format_parser(recformats, names, None).dtype 1383 1384 # TODO: In the future maybe enable loading a bit at a time so that we 1385 # can convert from this format to an actual FITS file on disk without 1386 # needing enough physical memory to hold the entire thing at once 1387 hdu = BinTableHDU.from_columns(np.recarray(shape=1, dtype=dtype), 1388 nrows=nrows, fill=True) 1389 1390 # TODO: It seems to me a lot of this could/should be handled from 1391 # within the FITS_rec class rather than here. 1392 data = hdu.data 1393 for idx, length in enumerate(vla_lengths): 1394 if length is not None: 1395 arr = data.columns._arrays[idx] 1396 dt = recformats[idx][len(str(length)):] 1397 1398 # NOTE: FormatQ not supported here; it's hard to determine 1399 # whether or not it will be necessary to use a wider descriptor 1400 # type. The function documentation will have to serve as a 1401 # warning that this is not supported. 1402 recformats[idx] = _FormatP(dt, max=length) 1403 data.columns._recformats[idx] = recformats[idx] 1404 name = data.columns.names[idx] 1405 data._cache_field(name, _makep(arr, arr, recformats[idx])) 1406 1407 def format_value(col, val): 1408 # Special formatting for a couple particular data types 1409 if recformats[col] == FITS2NUMPY['L']: 1410 return bool(int(val)) 1411 elif recformats[col] == FITS2NUMPY['M']: 1412 # For some reason, in arrays/fields where numpy expects a 1413 # complex it's not happy to take a string representation 1414 # (though it's happy to do that in other contexts), so we have 1415 # to convert the string representation for it: 1416 return complex(val) 1417 else: 1418 return val 1419 1420 # Jump back to the start of the data and create a new line reader 1421 fileobj.seek(initialpos) 1422 linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect) 1423 for row, line in enumerate(linereader): 1424 col = 0 1425 idx = 0 1426 while idx < len(line): 1427 if line[idx] == 'VLA_Length=': 1428 vla_len = vla_lengths[col] 1429 idx += 2 1430 slice_ = slice(idx, idx + vla_len) 1431 data[row][col][:] = line[idx:idx + vla_len] 1432 idx += vla_len 1433 elif dtype[col].shape: 1434 # This is an array column 1435 array_size = int(np.multiply.reduce(dtype[col].shape)) 1436 slice_ = slice(idx, idx + array_size) 1437 idx += array_size 1438 else: 1439 slice_ = None 1440 1441 if slice_ is None: 1442 # This is a scalar row element 1443 data[row][col] = format_value(col, line[idx]) 1444 idx += 1 1445 else: 1446 data[row][col].flat[:] = [format_value(col, val) 1447 for val in line[slice_]] 1448 1449 col += 1 1450 1451 if close_file: 1452 fileobj.close() 1453 1454 return data 1455 1456 @classmethod 1457 def _load_coldefs(cls, fileobj): 1458 """ 1459 Read the table column definitions from the ASCII file output by 1460 BinTableHDU.dump(). 1461 """ 1462 1463 close_file = False 1464 1465 if isinstance(fileobj, str): 1466 fileobj = open(fileobj, 'r') 1467 close_file = True 1468 1469 columns = [] 1470 1471 for line in fileobj: 1472 words = line[:-1].split() 1473 kwargs = {} 1474 for key in ['name', 'format', 'disp', 'unit', 'dim']: 1475 kwargs[key] = words.pop(0).replace('""', '') 1476 1477 for key in ['null', 'bscale', 'bzero']: 1478 word = words.pop(0).replace('""', '') 1479 if word: 1480 word = _str_to_num(word) 1481 kwargs[key] = word 1482 columns.append(Column(**kwargs)) 1483 1484 if close_file: 1485 fileobj.close() 1486 1487 return ColDefs(columns) 1488 1489 1490@contextlib.contextmanager 1491def _binary_table_byte_swap(data): 1492 """ 1493 Ensures that all the data of a binary FITS table (represented as a FITS_rec 1494 object) is in a big-endian byte order. Columns are swapped in-place one 1495 at a time, and then returned to their previous byte order when this context 1496 manager exits. 1497 1498 Because a new dtype is needed to represent the byte-swapped columns, the 1499 new dtype is temporarily applied as well. 1500 """ 1501 1502 orig_dtype = data.dtype 1503 1504 names = [] 1505 formats = [] 1506 offsets = [] 1507 1508 to_swap = [] 1509 1510 if sys.byteorder == 'little': 1511 swap_types = ('<', '=') 1512 else: 1513 swap_types = ('<',) 1514 1515 for idx, name in enumerate(orig_dtype.names): 1516 field = _get_recarray_field(data, idx) 1517 1518 field_dtype, field_offset = orig_dtype.fields[name] 1519 names.append(name) 1520 formats.append(field_dtype) 1521 offsets.append(field_offset) 1522 1523 if isinstance(field, chararray.chararray): 1524 continue 1525 1526 # only swap unswapped 1527 # must use field_dtype.base here since for multi-element dtypes, 1528 # the .str with be '|V<N>' where <N> is the total bytes per element 1529 if field.itemsize > 1 and field_dtype.base.str[0] in swap_types: 1530 to_swap.append(field) 1531 # Override the dtype for this field in the new record dtype with 1532 # the byteswapped version 1533 formats[-1] = field_dtype.newbyteorder() 1534 1535 # deal with var length table 1536 recformat = data.columns._recformats[idx] 1537 if isinstance(recformat, _FormatP): 1538 coldata = data.field(idx) 1539 for c in coldata: 1540 if (not isinstance(c, chararray.chararray) and 1541 c.itemsize > 1 and c.dtype.str[0] in swap_types): 1542 to_swap.append(c) 1543 1544 for arr in reversed(to_swap): 1545 arr.byteswap(True) 1546 1547 data.dtype = np.dtype({'names': names, 1548 'formats': formats, 1549 'offsets': offsets}) 1550 1551 yield data 1552 1553 for arr in to_swap: 1554 arr.byteswap(True) 1555 1556 data.dtype = orig_dtype 1557