1# Licensed under a 3-clause BSD style license - see PYFITS.rst 2 3import copy 4import operator 5import warnings 6import weakref 7 8from contextlib import suppress 9from functools import reduce 10 11import numpy as np 12 13from numpy import char as chararray 14 15from .column import (ASCIITNULL, FITS2NUMPY, ASCII2NUMPY, ASCII2STR, ColDefs, 16 _AsciiColDefs, _FormatX, _FormatP, _VLF, _get_index, 17 _wrapx, _unwrapx, _makep, Delayed) 18from .util import decode_ascii, encode_ascii, _rstrip_inplace 19from astropy.utils import lazyproperty 20 21 22class FITS_record: 23 """ 24 FITS record class. 25 26 `FITS_record` is used to access records of the `FITS_rec` object. 27 This will allow us to deal with scaled columns. It also handles 28 conversion/scaling of columns in ASCII tables. The `FITS_record` 29 class expects a `FITS_rec` object as input. 30 """ 31 32 def __init__(self, input, row=0, start=None, end=None, step=None, 33 base=None, **kwargs): 34 """ 35 Parameters 36 ---------- 37 input : array 38 The array to wrap. 39 40 row : int, optional 41 The starting logical row of the array. 42 43 start : int, optional 44 The starting column in the row associated with this object. 45 Used for subsetting the columns of the `FITS_rec` object. 46 47 end : int, optional 48 The ending column in the row associated with this object. 49 Used for subsetting the columns of the `FITS_rec` object. 50 """ 51 52 self.array = input 53 self.row = row 54 if base: 55 width = len(base) 56 else: 57 width = self.array._nfields 58 59 s = slice(start, end, step).indices(width) 60 self.start, self.end, self.step = s 61 self.base = base 62 63 def __getitem__(self, key): 64 if isinstance(key, str): 65 indx = _get_index(self.array.names, key) 66 67 if indx < self.start or indx > self.end - 1: 68 raise KeyError(f"Key '{key}' does not exist.") 69 elif isinstance(key, slice): 70 return type(self)(self.array, self.row, key.start, key.stop, 71 key.step, self) 72 else: 73 indx = self._get_index(key) 74 75 if indx > self.array._nfields - 1: 76 raise IndexError('Index out of bounds') 77 78 return self.array.field(indx)[self.row] 79 80 def __setitem__(self, key, value): 81 if isinstance(key, str): 82 indx = _get_index(self.array.names, key) 83 84 if indx < self.start or indx > self.end - 1: 85 raise KeyError(f"Key '{key}' does not exist.") 86 elif isinstance(key, slice): 87 for indx in range(slice.start, slice.stop, slice.step): 88 indx = self._get_indx(indx) 89 self.array.field(indx)[self.row] = value 90 else: 91 indx = self._get_index(key) 92 if indx > self.array._nfields - 1: 93 raise IndexError('Index out of bounds') 94 95 self.array.field(indx)[self.row] = value 96 97 def __len__(self): 98 return len(range(self.start, self.end, self.step)) 99 100 def __repr__(self): 101 """ 102 Display a single row. 103 """ 104 105 outlist = [] 106 for idx in range(len(self)): 107 outlist.append(repr(self[idx])) 108 return f"({', '.join(outlist)})" 109 110 def field(self, field): 111 """ 112 Get the field data of the record. 113 """ 114 115 return self.__getitem__(field) 116 117 def setfield(self, field, value): 118 """ 119 Set the field data of the record. 120 """ 121 122 self.__setitem__(field, value) 123 124 @lazyproperty 125 def _bases(self): 126 bases = [weakref.proxy(self)] 127 base = self.base 128 while base: 129 bases.append(base) 130 base = base.base 131 return bases 132 133 def _get_index(self, indx): 134 indices = np.ogrid[:self.array._nfields] 135 for base in reversed(self._bases): 136 if base.step < 1: 137 s = slice(base.start, None, base.step) 138 else: 139 s = slice(base.start, base.end, base.step) 140 indices = indices[s] 141 return indices[indx] 142 143 144class FITS_rec(np.recarray): 145 """ 146 FITS record array class. 147 148 `FITS_rec` is the data part of a table HDU's data part. This is a layer 149 over the `~numpy.recarray`, so we can deal with scaled columns. 150 151 It inherits all of the standard methods from `numpy.ndarray`. 152 """ 153 154 _record_type = FITS_record 155 _character_as_bytes = False 156 157 def __new__(subtype, input): 158 """ 159 Construct a FITS record array from a recarray. 160 """ 161 162 # input should be a record array 163 if input.dtype.subdtype is None: 164 self = np.recarray.__new__(subtype, input.shape, input.dtype, 165 buf=input.data) 166 else: 167 self = np.recarray.__new__(subtype, input.shape, input.dtype, 168 buf=input.data, strides=input.strides) 169 170 self._init() 171 if self.dtype.fields: 172 self._nfields = len(self.dtype.fields) 173 174 return self 175 176 def __setstate__(self, state): 177 meta = state[-1] 178 column_state = state[-2] 179 state = state[:-2] 180 181 super().__setstate__(state) 182 183 self._col_weakrefs = weakref.WeakSet() 184 185 for attr, value in zip(meta, column_state): 186 setattr(self, attr, value) 187 188 def __reduce__(self): 189 """ 190 Return a 3-tuple for pickling a FITS_rec. Use the super-class 191 functionality but then add in a tuple of FITS_rec-specific 192 values that get used in __setstate__. 193 """ 194 195 reconst_func, reconst_func_args, state = super().__reduce__() 196 197 # Define FITS_rec-specific attrs that get added to state 198 column_state = [] 199 meta = [] 200 201 for attrs in ['_converted', '_heapoffset', '_heapsize', '_nfields', 202 '_gap', '_uint', 'parnames', '_coldefs']: 203 204 with suppress(AttributeError): 205 # _coldefs can be Delayed, and file objects cannot be 206 # picked, it needs to be deepcopied first 207 if attrs == '_coldefs': 208 column_state.append(self._coldefs.__deepcopy__(None)) 209 else: 210 column_state.append(getattr(self, attrs)) 211 meta.append(attrs) 212 213 state = state + (column_state, meta) 214 215 return reconst_func, reconst_func_args, state 216 217 def __array_finalize__(self, obj): 218 if obj is None: 219 return 220 221 if isinstance(obj, FITS_rec): 222 self._character_as_bytes = obj._character_as_bytes 223 224 if isinstance(obj, FITS_rec) and obj.dtype == self.dtype: 225 self._converted = obj._converted 226 self._heapoffset = obj._heapoffset 227 self._heapsize = obj._heapsize 228 self._col_weakrefs = obj._col_weakrefs 229 self._coldefs = obj._coldefs 230 self._nfields = obj._nfields 231 self._gap = obj._gap 232 self._uint = obj._uint 233 elif self.dtype.fields is not None: 234 # This will allow regular ndarrays with fields, rather than 235 # just other FITS_rec objects 236 self._nfields = len(self.dtype.fields) 237 self._converted = {} 238 239 self._heapoffset = getattr(obj, '_heapoffset', 0) 240 self._heapsize = getattr(obj, '_heapsize', 0) 241 242 self._gap = getattr(obj, '_gap', 0) 243 self._uint = getattr(obj, '_uint', False) 244 self._col_weakrefs = weakref.WeakSet() 245 self._coldefs = ColDefs(self) 246 247 # Work around chicken-egg problem. Column.array relies on the 248 # _coldefs attribute to set up ref back to parent FITS_rec; however 249 # in the above line the self._coldefs has not been assigned yet so 250 # this fails. This patches that up... 251 for col in self._coldefs: 252 del col.array 253 col._parent_fits_rec = weakref.ref(self) 254 else: 255 self._init() 256 257 def _init(self): 258 """Initializes internal attributes specific to FITS-isms.""" 259 260 self._nfields = 0 261 self._converted = {} 262 self._heapoffset = 0 263 self._heapsize = 0 264 self._col_weakrefs = weakref.WeakSet() 265 self._coldefs = None 266 self._gap = 0 267 self._uint = False 268 269 @classmethod 270 def from_columns(cls, columns, nrows=0, fill=False, character_as_bytes=False): 271 """ 272 Given a `ColDefs` object of unknown origin, initialize a new `FITS_rec` 273 object. 274 275 .. note:: 276 277 This was originally part of the ``new_table`` function in the table 278 module but was moved into a class method since most of its 279 functionality always had more to do with initializing a `FITS_rec` 280 object than anything else, and much of it also overlapped with 281 ``FITS_rec._scale_back``. 282 283 Parameters 284 ---------- 285 columns : sequence of `Column` or a `ColDefs` 286 The columns from which to create the table data. If these 287 columns have data arrays attached that data may be used in 288 initializing the new table. Otherwise the input columns 289 will be used as a template for a new table with the requested 290 number of rows. 291 292 nrows : int 293 Number of rows in the new table. If the input columns have data 294 associated with them, the size of the largest input column is used. 295 Otherwise the default is 0. 296 297 fill : bool 298 If `True`, will fill all cells with zeros or blanks. If 299 `False`, copy the data from input, undefined cells will still 300 be filled with zeros/blanks. 301 """ 302 303 if not isinstance(columns, ColDefs): 304 columns = ColDefs(columns) 305 306 # read the delayed data 307 for column in columns: 308 arr = column.array 309 if isinstance(arr, Delayed): 310 if arr.hdu.data is None: 311 column.array = None 312 else: 313 column.array = _get_recarray_field(arr.hdu.data, 314 arr.field) 315 # Reset columns._arrays (which we may want to just do away with 316 # altogether 317 del columns._arrays 318 319 # use the largest column shape as the shape of the record 320 if nrows == 0: 321 for arr in columns._arrays: 322 if arr is not None: 323 dim = arr.shape[0] 324 else: 325 dim = 0 326 if dim > nrows: 327 nrows = dim 328 329 raw_data = np.empty(columns.dtype.itemsize * nrows, dtype=np.uint8) 330 raw_data.fill(ord(columns._padding_byte)) 331 data = np.recarray(nrows, dtype=columns.dtype, buf=raw_data).view(cls) 332 data._character_as_bytes = character_as_bytes 333 334 # Previously this assignment was made from hdu.columns, but that's a 335 # bug since if a _TableBaseHDU has a FITS_rec in its .data attribute 336 # the _TableBaseHDU.columns property is actually returned from 337 # .data._coldefs, so this assignment was circular! Don't make that 338 # mistake again. 339 # All of this is an artifact of the fragility of the FITS_rec class, 340 # and that it can't just be initialized by columns... 341 data._coldefs = columns 342 343 # If fill is True we don't copy anything from the column arrays. We're 344 # just using them as a template, and returning a table filled with 345 # zeros/blanks 346 if fill: 347 return data 348 349 # Otherwise we have to fill the recarray with data from the input 350 # columns 351 for idx, column in enumerate(columns): 352 # For each column in the ColDef object, determine the number of 353 # rows in that column. This will be either the number of rows in 354 # the ndarray associated with the column, or the number of rows 355 # given in the call to this function, which ever is smaller. If 356 # the input FILL argument is true, the number of rows is set to 357 # zero so that no data is copied from the original input data. 358 arr = column.array 359 360 if arr is None: 361 array_size = 0 362 else: 363 array_size = len(arr) 364 365 n = min(array_size, nrows) 366 367 # TODO: At least *some* of this logic is mostly redundant with the 368 # _convert_foo methods in this class; see if we can eliminate some 369 # of that duplication. 370 371 if not n: 372 # The input column had an empty array, so just use the fill 373 # value 374 continue 375 376 field = _get_recarray_field(data, idx) 377 name = column.name 378 fitsformat = column.format 379 recformat = fitsformat.recformat 380 381 outarr = field[:n] 382 inarr = arr[:n] 383 384 if isinstance(recformat, _FormatX): 385 # Data is a bit array 386 if inarr.shape[-1] == recformat.repeat: 387 _wrapx(inarr, outarr, recformat.repeat) 388 continue 389 elif isinstance(recformat, _FormatP): 390 data._cache_field(name, _makep(inarr, field, recformat, 391 nrows=nrows)) 392 continue 393 # TODO: Find a better way of determining that the column is meant 394 # to be FITS L formatted 395 elif recformat[-2:] == FITS2NUMPY['L'] and inarr.dtype == bool: 396 # column is boolean 397 # The raw data field should be filled with either 'T' or 'F' 398 # (not 0). Use 'F' as a default 399 field[:] = ord('F') 400 # Also save the original boolean array in data._converted so 401 # that it doesn't have to be re-converted 402 converted = np.zeros(field.shape, dtype=bool) 403 converted[:n] = inarr 404 data._cache_field(name, converted) 405 # TODO: Maybe this step isn't necessary at all if _scale_back 406 # will handle it? 407 inarr = np.where(inarr == np.False_, ord('F'), ord('T')) 408 elif (columns[idx]._physical_values and 409 columns[idx]._pseudo_unsigned_ints): 410 # Temporary hack... 411 bzero = column.bzero 412 converted = np.zeros(field.shape, dtype=inarr.dtype) 413 converted[:n] = inarr 414 data._cache_field(name, converted) 415 if n < nrows: 416 # Pre-scale rows below the input data 417 field[n:] = -bzero 418 419 inarr = inarr - bzero 420 elif isinstance(columns, _AsciiColDefs): 421 # Regardless whether the format is character or numeric, if the 422 # input array contains characters then it's already in the raw 423 # format for ASCII tables 424 if fitsformat._pseudo_logical: 425 # Hack to support converting from 8-bit T/F characters 426 # Normally the column array is a chararray of 1 character 427 # strings, but we need to view it as a normal ndarray of 428 # 8-bit ints to fill it with ASCII codes for 'T' and 'F' 429 outarr = field.view(np.uint8, np.ndarray)[:n] 430 elif arr.dtype.kind not in ('S', 'U'): 431 # Set up views of numeric columns with the appropriate 432 # numeric dtype 433 # Fill with the appropriate blanks for the column format 434 data._cache_field(name, np.zeros(nrows, dtype=arr.dtype)) 435 outarr = data._converted[name][:n] 436 437 outarr[:] = inarr 438 continue 439 440 if inarr.shape != outarr.shape: 441 if (inarr.dtype.kind == outarr.dtype.kind and 442 inarr.dtype.kind in ('U', 'S') and 443 inarr.dtype != outarr.dtype): 444 445 inarr_rowsize = inarr[0].size 446 inarr = inarr.flatten().view(outarr.dtype) 447 448 # This is a special case to handle input arrays with 449 # non-trivial TDIMn. 450 # By design each row of the outarray is 1-D, while each row of 451 # the input array may be n-D 452 if outarr.ndim > 1: 453 # The normal case where the first dimension is the rows 454 inarr_rowsize = inarr[0].size 455 inarr = inarr.reshape(n, inarr_rowsize) 456 outarr[:, :inarr_rowsize] = inarr 457 else: 458 # Special case for strings where the out array only has one 459 # dimension (the second dimension is rolled up into the 460 # strings 461 outarr[:n] = inarr.ravel() 462 else: 463 outarr[:] = inarr 464 465 # Now replace the original column array references with the new 466 # fields 467 # This is required to prevent the issue reported in 468 # https://github.com/spacetelescope/PyFITS/issues/99 469 for idx in range(len(columns)): 470 columns._arrays[idx] = data.field(idx) 471 472 return data 473 474 def __repr__(self): 475 # Force use of the normal ndarray repr (rather than the new 476 # one added for recarray in Numpy 1.10) for backwards compat 477 return np.ndarray.__repr__(self) 478 479 def __getattribute__(self, attr): 480 # First, see if ndarray has this attr, and return it if so. Note that 481 # this means a field with the same name as an ndarray attr cannot be 482 # accessed by attribute, this is Numpy's default behavior. 483 # We avoid using np.recarray.__getattribute__ here because after doing 484 # this check it would access the columns without doing the conversions 485 # that we need (with .field, see below). 486 try: 487 return object.__getattribute__(self, attr) 488 except AttributeError: 489 pass 490 491 # attr might still be a fieldname. If we have column definitions, 492 # we should access this via .field, as the data may have to be scaled. 493 if self._coldefs is not None and attr in self.columns.names: 494 return self.field(attr) 495 496 # If not, just let the usual np.recarray override deal with it. 497 return super().__getattribute__(attr) 498 499 def __getitem__(self, key): 500 if self._coldefs is None: 501 return super().__getitem__(key) 502 503 if isinstance(key, str): 504 return self.field(key) 505 506 # Have to view as a recarray then back as a FITS_rec, otherwise the 507 # circular reference fix/hack in FITS_rec.field() won't preserve 508 # the slice. 509 out = self.view(np.recarray)[key] 510 if type(out) is not np.recarray: 511 # Oops, we got a single element rather than a view. In that case, 512 # return a Record, which has no __getstate__ and is more efficient. 513 return self._record_type(self, key) 514 515 # We got a view; change it back to our class, and add stuff 516 out = out.view(type(self)) 517 out._uint = self._uint 518 out._coldefs = ColDefs(self._coldefs) 519 arrays = [] 520 out._converted = {} 521 for idx, name in enumerate(self._coldefs.names): 522 # 523 # Store the new arrays for the _coldefs object 524 # 525 arrays.append(self._coldefs._arrays[idx][key]) 526 527 # Ensure that the sliced FITS_rec will view the same scaled 528 # columns as the original; this is one of the few cases where 529 # it is not necessary to use _cache_field() 530 if name in self._converted: 531 dummy = self._converted[name] 532 field = np.ndarray.__getitem__(dummy, key) 533 out._converted[name] = field 534 535 out._coldefs._arrays = arrays 536 return out 537 538 def __setitem__(self, key, value): 539 if self._coldefs is None: 540 return super().__setitem__(key, value) 541 542 if isinstance(key, str): 543 self[key][:] = value 544 return 545 546 if isinstance(key, slice): 547 end = min(len(self), key.stop or len(self)) 548 end = max(0, end) 549 start = max(0, key.start or 0) 550 end = min(end, start + len(value)) 551 552 for idx in range(start, end): 553 self.__setitem__(idx, value[idx - start]) 554 return 555 556 if isinstance(value, FITS_record): 557 for idx in range(self._nfields): 558 self.field(self.names[idx])[key] = value.field(self.names[idx]) 559 elif isinstance(value, (tuple, list, np.void)): 560 if self._nfields == len(value): 561 for idx in range(self._nfields): 562 self.field(idx)[key] = value[idx] 563 else: 564 raise ValueError('Input tuple or list required to have {} ' 565 'elements.'.format(self._nfields)) 566 else: 567 raise TypeError('Assignment requires a FITS_record, tuple, or ' 568 'list as input.') 569 570 def _ipython_key_completions_(self): 571 return self.names 572 573 def copy(self, order='C'): 574 """ 575 The Numpy documentation lies; `numpy.ndarray.copy` is not equivalent to 576 `numpy.copy`. Differences include that it re-views the copied array as 577 self's ndarray subclass, as though it were taking a slice; this means 578 ``__array_finalize__`` is called and the copy shares all the array 579 attributes (including ``._converted``!). So we need to make a deep 580 copy of all those attributes so that the two arrays truly do not share 581 any data. 582 """ 583 584 new = super().copy(order=order) 585 586 new.__dict__ = copy.deepcopy(self.__dict__) 587 return new 588 589 @property 590 def columns(self): 591 """A user-visible accessor for the coldefs.""" 592 593 return self._coldefs 594 595 @property 596 def _coldefs(self): 597 # This used to be a normal internal attribute, but it was changed to a 598 # property as a quick and transparent way to work around the reference 599 # leak bug fixed in https://github.com/astropy/astropy/pull/4539 600 # 601 # See the long comment in the Column.array property for more details 602 # on this. But in short, FITS_rec now has a ._col_weakrefs attribute 603 # which is a WeakSet of weakrefs to each Column in _coldefs. 604 # 605 # So whenever ._coldefs is set we also add each Column in the ColDefs 606 # to the weakrefs set. This is an easy way to find out if a Column has 607 # any references to it external to the FITS_rec (i.e. a user assigned a 608 # column to a variable). If the column is still in _col_weakrefs then 609 # there are other references to it external to this FITS_rec. We use 610 # that information in __del__ to save off copies of the array data 611 # for those columns to their Column.array property before our memory 612 # is freed. 613 return self.__dict__.get('_coldefs') 614 615 @_coldefs.setter 616 def _coldefs(self, cols): 617 self.__dict__['_coldefs'] = cols 618 if isinstance(cols, ColDefs): 619 for col in cols.columns: 620 self._col_weakrefs.add(col) 621 622 @_coldefs.deleter 623 def _coldefs(self): 624 try: 625 del self.__dict__['_coldefs'] 626 except KeyError as exc: 627 raise AttributeError(exc.args[0]) 628 629 def __del__(self): 630 try: 631 del self._coldefs 632 if self.dtype.fields is not None: 633 for col in self._col_weakrefs: 634 635 if col.array is not None: 636 col.array = col.array.copy() 637 638 # See issues #4690 and #4912 639 except (AttributeError, TypeError): # pragma: no cover 640 pass 641 642 @property 643 def names(self): 644 """List of column names.""" 645 646 if self.dtype.fields: 647 return list(self.dtype.names) 648 elif getattr(self, '_coldefs', None) is not None: 649 return self._coldefs.names 650 else: 651 return None 652 653 @property 654 def formats(self): 655 """List of column FITS formats.""" 656 657 if getattr(self, '_coldefs', None) is not None: 658 return self._coldefs.formats 659 660 return None 661 662 @property 663 def _raw_itemsize(self): 664 """ 665 Returns the size of row items that would be written to the raw FITS 666 file, taking into account the possibility of unicode columns being 667 compactified. 668 669 Currently for internal use only. 670 """ 671 672 if _has_unicode_fields(self): 673 total_itemsize = 0 674 for field in self.dtype.fields.values(): 675 itemsize = field[0].itemsize 676 if field[0].kind == 'U': 677 itemsize = itemsize // 4 678 total_itemsize += itemsize 679 return total_itemsize 680 else: 681 # Just return the normal itemsize 682 return self.itemsize 683 684 def field(self, key): 685 """ 686 A view of a `Column`'s data as an array. 687 """ 688 689 # NOTE: The *column* index may not be the same as the field index in 690 # the recarray, if the column is a phantom column 691 column = self.columns[key] 692 name = column.name 693 format = column.format 694 695 if format.dtype.itemsize == 0: 696 warnings.warn( 697 'Field {!r} has a repeat count of 0 in its format code, ' 698 'indicating an empty field.'.format(key)) 699 return np.array([], dtype=format.dtype) 700 701 # If field's base is a FITS_rec, we can run into trouble because it 702 # contains a reference to the ._coldefs object of the original data; 703 # this can lead to a circular reference; see ticket #49 704 base = self 705 while (isinstance(base, FITS_rec) and 706 isinstance(base.base, np.recarray)): 707 base = base.base 708 # base could still be a FITS_rec in some cases, so take care to 709 # use rec.recarray.field to avoid a potential infinite 710 # recursion 711 field = _get_recarray_field(base, name) 712 713 if name not in self._converted: 714 recformat = format.recformat 715 # TODO: If we're now passing the column to these subroutines, do we 716 # really need to pass them the recformat? 717 if isinstance(recformat, _FormatP): 718 # for P format 719 converted = self._convert_p(column, field, recformat) 720 else: 721 # Handle all other column data types which are fixed-width 722 # fields 723 converted = self._convert_other(column, field, recformat) 724 725 # Note: Never assign values directly into the self._converted dict; 726 # always go through self._cache_field; this way self._converted is 727 # only used to store arrays that are not already direct views of 728 # our own data. 729 self._cache_field(name, converted) 730 return converted 731 732 return self._converted[name] 733 734 def _cache_field(self, name, field): 735 """ 736 Do not store fields in _converted if one of its bases is self, 737 or if it has a common base with self. 738 739 This results in a reference cycle that cannot be broken since 740 ndarrays do not participate in cyclic garbage collection. 741 """ 742 743 base = field 744 while True: 745 self_base = self 746 while True: 747 if self_base is base: 748 return 749 750 if getattr(self_base, 'base', None) is not None: 751 self_base = self_base.base 752 else: 753 break 754 755 if getattr(base, 'base', None) is not None: 756 base = base.base 757 else: 758 break 759 760 self._converted[name] = field 761 762 def _update_column_attribute_changed(self, column, idx, attr, old_value, 763 new_value): 764 """ 765 Update how the data is formatted depending on changes to column 766 attributes initiated by the user through the `Column` interface. 767 768 Dispatches column attribute change notifications to individual methods 769 for each attribute ``_update_column_<attr>`` 770 """ 771 772 method_name = f'_update_column_{attr}' 773 if hasattr(self, method_name): 774 # Right now this is so we can be lazy and not implement updaters 775 # for every attribute yet--some we may not need at all, TBD 776 getattr(self, method_name)(column, idx, old_value, new_value) 777 778 def _update_column_name(self, column, idx, old_name, name): 779 """Update the dtype field names when a column name is changed.""" 780 781 dtype = self.dtype 782 # Updating the names on the dtype should suffice 783 dtype.names = dtype.names[:idx] + (name,) + dtype.names[idx + 1:] 784 785 def _convert_x(self, field, recformat): 786 """Convert a raw table column to a bit array as specified by the 787 FITS X format. 788 """ 789 790 dummy = np.zeros(self.shape + (recformat.repeat,), dtype=np.bool_) 791 _unwrapx(field, dummy, recformat.repeat) 792 return dummy 793 794 def _convert_p(self, column, field, recformat): 795 """Convert a raw table column of FITS P or Q format descriptors 796 to a VLA column with the array data returned from the heap. 797 """ 798 799 dummy = _VLF([None] * len(self), dtype=recformat.dtype) 800 raw_data = self._get_raw_data() 801 802 if raw_data is None: 803 raise OSError( 804 "Could not find heap data for the {!r} variable-length " 805 "array column.".format(column.name)) 806 807 for idx in range(len(self)): 808 offset = field[idx, 1] + self._heapoffset 809 count = field[idx, 0] 810 811 if recformat.dtype == 'a': 812 dt = np.dtype(recformat.dtype + str(1)) 813 arr_len = count * dt.itemsize 814 da = raw_data[offset:offset + arr_len].view(dt) 815 da = np.char.array(da.view(dtype=dt), itemsize=count) 816 dummy[idx] = decode_ascii(da) 817 else: 818 dt = np.dtype(recformat.dtype) 819 arr_len = count * dt.itemsize 820 dummy[idx] = raw_data[offset:offset + arr_len].view(dt) 821 dummy[idx].dtype = dummy[idx].dtype.newbyteorder('>') 822 # Each array in the field may now require additional 823 # scaling depending on the other scaling parameters 824 # TODO: The same scaling parameters apply to every 825 # array in the column so this is currently very slow; we 826 # really only need to check once whether any scaling will 827 # be necessary and skip this step if not 828 # TODO: Test that this works for X format; I don't think 829 # that it does--the recformat variable only applies to the P 830 # format not the X format 831 dummy[idx] = self._convert_other(column, dummy[idx], 832 recformat) 833 834 return dummy 835 836 def _convert_ascii(self, column, field): 837 """ 838 Special handling for ASCII table columns to convert columns containing 839 numeric types to actual numeric arrays from the string representation. 840 """ 841 842 format = column.format 843 recformat = getattr(format, 'recformat', ASCII2NUMPY[format[0]]) 844 # if the string = TNULL, return ASCIITNULL 845 nullval = str(column.null).strip().encode('ascii') 846 if len(nullval) > format.width: 847 nullval = nullval[:format.width] 848 849 # Before using .replace make sure that any trailing bytes in each 850 # column are filled with spaces, and *not*, say, nulls; this causes 851 # functions like replace to potentially leave gibberish bytes in the 852 # array buffer. 853 dummy = np.char.ljust(field, format.width) 854 dummy = np.char.replace(dummy, encode_ascii('D'), encode_ascii('E')) 855 null_fill = encode_ascii(str(ASCIITNULL).rjust(format.width)) 856 857 # Convert all fields equal to the TNULL value (nullval) to empty fields. 858 # TODO: These fields really should be converted to NaN or something else undefined. 859 # Currently they are converted to empty fields, which are then set to zero. 860 dummy = np.where(np.char.strip(dummy) == nullval, null_fill, dummy) 861 862 # always replace empty fields, see https://github.com/astropy/astropy/pull/5394 863 if nullval != b'': 864 dummy = np.where(np.char.strip(dummy) == b'', null_fill, dummy) 865 866 try: 867 dummy = np.array(dummy, dtype=recformat) 868 except ValueError as exc: 869 indx = self.names.index(column.name) 870 raise ValueError( 871 '{}; the header may be missing the necessary TNULL{} ' 872 'keyword or the table contains invalid data'.format( 873 exc, indx + 1)) 874 875 return dummy 876 877 def _convert_other(self, column, field, recformat): 878 """Perform conversions on any other fixed-width column data types. 879 880 This may not perform any conversion at all if it's not necessary, in 881 which case the original column array is returned. 882 """ 883 884 if isinstance(recformat, _FormatX): 885 # special handling for the X format 886 return self._convert_x(field, recformat) 887 888 (_str, _bool, _number, _scale, _zero, bscale, bzero, dim) = \ 889 self._get_scale_factors(column) 890 891 indx = self.names.index(column.name) 892 893 # ASCII table, convert strings to numbers 894 # TODO: 895 # For now, check that these are ASCII columns by checking the coldefs 896 # type; in the future all columns (for binary tables, ASCII tables, or 897 # otherwise) should "know" what type they are already and how to handle 898 # converting their data from FITS format to native format and vice 899 # versa... 900 if not _str and isinstance(self._coldefs, _AsciiColDefs): 901 field = self._convert_ascii(column, field) 902 903 # Test that the dimensions given in dim are sensible; otherwise 904 # display a warning and ignore them 905 if dim: 906 # See if the dimensions already match, if not, make sure the 907 # number items will fit in the specified dimensions 908 if field.ndim > 1: 909 actual_shape = field.shape[1:] 910 if _str: 911 actual_shape = actual_shape + (field.itemsize,) 912 else: 913 actual_shape = field.shape[0] 914 915 if dim == actual_shape: 916 # The array already has the correct dimensions, so we 917 # ignore dim and don't convert 918 dim = None 919 else: 920 nitems = reduce(operator.mul, dim) 921 if _str: 922 actual_nitems = field.itemsize 923 elif len(field.shape) == 1: # No repeat count in TFORMn, equivalent to 1 924 actual_nitems = 1 925 else: 926 actual_nitems = field.shape[1] 927 if nitems > actual_nitems: 928 warnings.warn( 929 'TDIM{} value {:d} does not fit with the size of ' 930 'the array items ({:d}). TDIM{:d} will be ignored.' 931 .format(indx + 1, self._coldefs[indx].dims, 932 actual_nitems, indx + 1)) 933 dim = None 934 935 # further conversion for both ASCII and binary tables 936 # For now we've made columns responsible for *knowing* whether their 937 # data has been scaled, but we make the FITS_rec class responsible for 938 # actually doing the scaling 939 # TODO: This also needs to be fixed in the effort to make Columns 940 # responsible for scaling their arrays to/from FITS native values 941 if not column.ascii and column.format.p_format: 942 format_code = column.format.p_format 943 else: 944 # TODO: Rather than having this if/else it might be nice if the 945 # ColumnFormat class had an attribute guaranteed to give the format 946 # of actual values in a column regardless of whether the true 947 # format is something like P or Q 948 format_code = column.format.format 949 950 if (_number and (_scale or _zero) and not column._physical_values): 951 # This is to handle pseudo unsigned ints in table columns 952 # TODO: For now this only really works correctly for binary tables 953 # Should it work for ASCII tables as well? 954 if self._uint: 955 if bzero == 2**15 and format_code == 'I': 956 field = np.array(field, dtype=np.uint16) 957 elif bzero == 2**31 and format_code == 'J': 958 field = np.array(field, dtype=np.uint32) 959 elif bzero == 2**63 and format_code == 'K': 960 field = np.array(field, dtype=np.uint64) 961 bzero64 = np.uint64(2 ** 63) 962 else: 963 field = np.array(field, dtype=np.float64) 964 else: 965 field = np.array(field, dtype=np.float64) 966 967 if _scale: 968 np.multiply(field, bscale, field) 969 if _zero: 970 if self._uint and format_code == 'K': 971 # There is a chance of overflow, so be careful 972 test_overflow = field.copy() 973 try: 974 test_overflow += bzero64 975 except OverflowError: 976 warnings.warn( 977 "Overflow detected while applying TZERO{:d}. " 978 "Returning unscaled data.".format(indx + 1)) 979 else: 980 field = test_overflow 981 else: 982 field += bzero 983 984 # mark the column as scaled 985 column._physical_values = True 986 987 elif _bool and field.dtype != bool: 988 field = np.equal(field, ord('T')) 989 elif _str: 990 if not self._character_as_bytes: 991 with suppress(UnicodeDecodeError): 992 field = decode_ascii(field) 993 994 if dim: 995 # Apply the new field item dimensions 996 nitems = reduce(operator.mul, dim) 997 if field.ndim > 1: 998 field = field[:, :nitems] 999 if _str: 1000 fmt = field.dtype.char 1001 dtype = (f'|{fmt}{dim[-1]}', dim[:-1]) 1002 field.dtype = dtype 1003 else: 1004 field.shape = (field.shape[0],) + dim 1005 1006 return field 1007 1008 def _get_heap_data(self): 1009 """ 1010 Returns a pointer into the table's raw data to its heap (if present). 1011 1012 This is returned as a numpy byte array. 1013 """ 1014 1015 if self._heapsize: 1016 raw_data = self._get_raw_data().view(np.ubyte) 1017 heap_end = self._heapoffset + self._heapsize 1018 return raw_data[self._heapoffset:heap_end] 1019 else: 1020 return np.array([], dtype=np.ubyte) 1021 1022 def _get_raw_data(self): 1023 """ 1024 Returns the base array of self that "raw data array" that is the 1025 array in the format that it was first read from a file before it was 1026 sliced or viewed as a different type in any way. 1027 1028 This is determined by walking through the bases until finding one that 1029 has at least the same number of bytes as self, plus the heapsize. This 1030 may be the immediate .base but is not always. This is used primarily 1031 for variable-length array support which needs to be able to find the 1032 heap (the raw data *may* be larger than nbytes + heapsize if it 1033 contains a gap or padding). 1034 1035 May return ``None`` if no array resembling the "raw data" according to 1036 the stated criteria can be found. 1037 """ 1038 1039 raw_data_bytes = self.nbytes + self._heapsize 1040 base = self 1041 while hasattr(base, 'base') and base.base is not None: 1042 base = base.base 1043 if hasattr(base, 'nbytes') and base.nbytes >= raw_data_bytes: 1044 return base 1045 1046 def _get_scale_factors(self, column): 1047 """Get all the scaling flags and factors for one column.""" 1048 1049 # TODO: Maybe this should be a method/property on Column? Or maybe 1050 # it's not really needed at all... 1051 _str = column.format.format == 'A' 1052 _bool = column.format.format == 'L' 1053 1054 _number = not (_bool or _str) 1055 bscale = column.bscale 1056 bzero = column.bzero 1057 1058 _scale = bscale not in ('', None, 1) 1059 _zero = bzero not in ('', None, 0) 1060 1061 # ensure bscale/bzero are numbers 1062 if not _scale: 1063 bscale = 1 1064 if not _zero: 1065 bzero = 0 1066 1067 # column._dims gives a tuple, rather than column.dim which returns the 1068 # original string format code from the FITS header... 1069 dim = column._dims 1070 1071 return (_str, _bool, _number, _scale, _zero, bscale, bzero, dim) 1072 1073 def _scale_back(self, update_heap_pointers=True): 1074 """ 1075 Update the parent array, using the (latest) scaled array. 1076 1077 If ``update_heap_pointers`` is `False`, this will leave all the heap 1078 pointers in P/Q columns as they are verbatim--it only makes sense to do 1079 this if there is already data on the heap and it can be guaranteed that 1080 that data has not been modified, and there is not new data to add to 1081 the heap. Currently this is only used as an optimization for 1082 CompImageHDU that does its own handling of the heap. 1083 """ 1084 1085 # Running total for the new heap size 1086 heapsize = 0 1087 1088 for indx, name in enumerate(self.dtype.names): 1089 column = self._coldefs[indx] 1090 recformat = column.format.recformat 1091 raw_field = _get_recarray_field(self, indx) 1092 1093 # add the location offset of the heap area for each 1094 # variable length column 1095 if isinstance(recformat, _FormatP): 1096 # Irritatingly, this can return a different dtype than just 1097 # doing np.dtype(recformat.dtype); but this returns the results 1098 # that we want. For example if recformat.dtype is 'a' we want 1099 # an array of characters. 1100 dtype = np.array([], dtype=recformat.dtype).dtype 1101 1102 if update_heap_pointers and name in self._converted: 1103 # The VLA has potentially been updated, so we need to 1104 # update the array descriptors 1105 raw_field[:] = 0 # reset 1106 npts = [len(arr) for arr in self._converted[name]] 1107 1108 raw_field[:len(npts), 0] = npts 1109 raw_field[1:, 1] = (np.add.accumulate(raw_field[:-1, 0]) * 1110 dtype.itemsize) 1111 raw_field[:, 1][:] += heapsize 1112 1113 heapsize += raw_field[:, 0].sum() * dtype.itemsize 1114 # Even if this VLA has not been read or updated, we need to 1115 # include the size of its constituent arrays in the heap size 1116 # total 1117 1118 if isinstance(recformat, _FormatX) and name in self._converted: 1119 _wrapx(self._converted[name], raw_field, recformat.repeat) 1120 continue 1121 1122 _str, _bool, _number, _scale, _zero, bscale, bzero, _ = \ 1123 self._get_scale_factors(column) 1124 1125 field = self._converted.get(name, raw_field) 1126 1127 # conversion for both ASCII and binary tables 1128 if _number or _str: 1129 if _number and (_scale or _zero) and column._physical_values: 1130 dummy = field.copy() 1131 if _zero: 1132 dummy -= bzero 1133 if _scale: 1134 dummy /= bscale 1135 # This will set the raw values in the recarray back to 1136 # their non-physical storage values, so the column should 1137 # be mark is not scaled 1138 column._physical_values = False 1139 elif _str or isinstance(self._coldefs, _AsciiColDefs): 1140 dummy = field 1141 else: 1142 continue 1143 1144 # ASCII table, convert numbers to strings 1145 if isinstance(self._coldefs, _AsciiColDefs): 1146 self._scale_back_ascii(indx, dummy, raw_field) 1147 # binary table string column 1148 elif isinstance(raw_field, chararray.chararray): 1149 self._scale_back_strings(indx, dummy, raw_field) 1150 # all other binary table columns 1151 else: 1152 if len(raw_field) and isinstance(raw_field[0], 1153 np.integer): 1154 dummy = np.around(dummy) 1155 1156 if raw_field.shape == dummy.shape: 1157 raw_field[:] = dummy 1158 else: 1159 # Reshaping the data is necessary in cases where the 1160 # TDIMn keyword was used to shape a column's entries 1161 # into arrays 1162 raw_field[:] = dummy.ravel().view(raw_field.dtype) 1163 1164 del dummy 1165 1166 # ASCII table does not have Boolean type 1167 elif _bool and name in self._converted: 1168 choices = (np.array([ord('F')], dtype=np.int8)[0], 1169 np.array([ord('T')], dtype=np.int8)[0]) 1170 raw_field[:] = np.choose(field, choices) 1171 1172 # Store the updated heapsize 1173 self._heapsize = heapsize 1174 1175 def _scale_back_strings(self, col_idx, input_field, output_field): 1176 # There are a few possibilities this has to be able to handle properly 1177 # The input_field, which comes from the _converted column is of dtype 1178 # 'Un' so that elements read out of the array are normal str 1179 # objects (i.e. unicode strings) 1180 # 1181 # At the other end the *output_field* may also be of type 'S' or of 1182 # type 'U'. It will *usually* be of type 'S' because when reading 1183 # an existing FITS table the raw data is just ASCII strings, and 1184 # represented in Numpy as an S array. However, when a user creates 1185 # a new table from scratch, they *might* pass in a column containing 1186 # unicode strings (dtype 'U'). Therefore the output_field of the 1187 # raw array is actually a unicode array. But we still want to make 1188 # sure the data is encodable as ASCII. Later when we write out the 1189 # array we use, in the dtype 'U' case, a different write routine 1190 # that writes row by row and encodes any 'U' columns to ASCII. 1191 1192 # If the output_field is non-ASCII we will worry about ASCII encoding 1193 # later when writing; otherwise we can do it right here 1194 if input_field.dtype.kind == 'U' and output_field.dtype.kind == 'S': 1195 try: 1196 _ascii_encode(input_field, out=output_field) 1197 except _UnicodeArrayEncodeError as exc: 1198 raise ValueError( 1199 "Could not save column '{}': Contains characters that " 1200 "cannot be encoded as ASCII as required by FITS, starting " 1201 "at the index {!r} of the column, and the index {} of " 1202 "the string at that location.".format( 1203 self._coldefs[col_idx].name, 1204 exc.index[0] if len(exc.index) == 1 else exc.index, 1205 exc.start)) 1206 else: 1207 # Otherwise go ahead and do a direct copy into--if both are type 1208 # 'U' we'll handle encoding later 1209 input_field = input_field.flatten().view(output_field.dtype) 1210 output_field.flat[:] = input_field 1211 1212 # Ensure that blanks at the end of each string are 1213 # converted to nulls instead of spaces, see Trac #15 1214 # and #111 1215 _rstrip_inplace(output_field) 1216 1217 def _scale_back_ascii(self, col_idx, input_field, output_field): 1218 """ 1219 Convert internal array values back to ASCII table representation. 1220 1221 The ``input_field`` is the internal representation of the values, and 1222 the ``output_field`` is the character array representing the ASCII 1223 output that will be written. 1224 """ 1225 1226 starts = self._coldefs.starts[:] 1227 spans = self._coldefs.spans 1228 format = self._coldefs[col_idx].format 1229 1230 # The the index of the "end" column of the record, beyond 1231 # which we can't write 1232 end = super().field(-1).itemsize 1233 starts.append(end + starts[-1]) 1234 1235 if col_idx > 0: 1236 lead = starts[col_idx] - starts[col_idx - 1] - spans[col_idx - 1] 1237 else: 1238 lead = 0 1239 1240 if lead < 0: 1241 warnings.warn('Column {!r} starting point overlaps the previous ' 1242 'column.'.format(col_idx + 1)) 1243 1244 trail = starts[col_idx + 1] - starts[col_idx] - spans[col_idx] 1245 1246 if trail < 0: 1247 warnings.warn('Column {!r} ending point overlaps the next ' 1248 'column.'.format(col_idx + 1)) 1249 1250 # TODO: It would be nice if these string column formatting 1251 # details were left to a specialized class, as is the case 1252 # with FormatX and FormatP 1253 if 'A' in format: 1254 _pc = '{:' 1255 else: 1256 _pc = '{:>' 1257 1258 fmt = ''.join([_pc, format[1:], ASCII2STR[format[0]], '}', 1259 (' ' * trail)]) 1260 1261 # Even if the format precision is 0, we should output a decimal point 1262 # as long as there is space to do so--not including a decimal point in 1263 # a float value is discouraged by the FITS Standard 1264 trailing_decimal = (format.precision == 0 and 1265 format.format in ('F', 'E', 'D')) 1266 1267 # not using numarray.strings's num2char because the 1268 # result is not allowed to expand (as C/Python does). 1269 for jdx, value in enumerate(input_field): 1270 value = fmt.format(value) 1271 if len(value) > starts[col_idx + 1] - starts[col_idx]: 1272 raise ValueError( 1273 "Value {!r} does not fit into the output's itemsize of " 1274 "{}.".format(value, spans[col_idx])) 1275 1276 if trailing_decimal and value[0] == ' ': 1277 # We have some extra space in the field for the trailing 1278 # decimal point 1279 value = value[1:] + '.' 1280 1281 output_field[jdx] = value 1282 1283 # Replace exponent separator in floating point numbers 1284 if 'D' in format: 1285 output_field[:] = output_field.replace(b'E', b'D') 1286 1287 def tolist(self): 1288 # Override .tolist to take care of special case of VLF 1289 1290 column_lists = [self[name].tolist() for name in self.columns.names] 1291 1292 return [list(row) for row in zip(*column_lists)] 1293 1294 1295def _get_recarray_field(array, key): 1296 """ 1297 Compatibility function for using the recarray base class's field method. 1298 This incorporates the legacy functionality of returning string arrays as 1299 Numeric-style chararray objects. 1300 """ 1301 1302 # Numpy >= 1.10.dev recarray no longer returns chararrays for strings 1303 # This is currently needed for backwards-compatibility and for 1304 # automatic truncation of trailing whitespace 1305 field = np.recarray.field(array, key) 1306 if (field.dtype.char in ('S', 'U') and 1307 not isinstance(field, chararray.chararray)): 1308 field = field.view(chararray.chararray) 1309 return field 1310 1311 1312class _UnicodeArrayEncodeError(UnicodeEncodeError): 1313 def __init__(self, encoding, object_, start, end, reason, index): 1314 super().__init__(encoding, object_, start, end, reason) 1315 self.index = index 1316 1317 1318def _ascii_encode(inarray, out=None): 1319 """ 1320 Takes a unicode array and fills the output string array with the ASCII 1321 encodings (if possible) of the elements of the input array. The two arrays 1322 must be the same size (though not necessarily the same shape). 1323 1324 This is like an inplace version of `np.char.encode` though simpler since 1325 it's only limited to ASCII, and hence the size of each character is 1326 guaranteed to be 1 byte. 1327 1328 If any strings are non-ASCII an UnicodeArrayEncodeError is raised--this is 1329 just a `UnicodeEncodeError` with an additional attribute for the index of 1330 the item that couldn't be encoded. 1331 """ 1332 1333 out_dtype = np.dtype((f'S{inarray.dtype.itemsize // 4}', 1334 inarray.dtype.shape)) 1335 if out is not None: 1336 out = out.view(out_dtype) 1337 1338 op_dtypes = [inarray.dtype, out_dtype] 1339 op_flags = [['readonly'], ['writeonly', 'allocate']] 1340 it = np.nditer([inarray, out], op_dtypes=op_dtypes, 1341 op_flags=op_flags, flags=['zerosize_ok']) 1342 1343 try: 1344 for initem, outitem in it: 1345 outitem[...] = initem.item().encode('ascii') 1346 except UnicodeEncodeError as exc: 1347 index = np.unravel_index(it.iterindex, inarray.shape) 1348 raise _UnicodeArrayEncodeError(*(exc.args + (index,))) 1349 1350 return it.operands[1] 1351 1352 1353def _has_unicode_fields(array): 1354 """ 1355 Returns True if any fields in a structured array have Unicode dtype. 1356 """ 1357 1358 dtypes = (d[0] for d in array.dtype.fields.values()) 1359 return any(d.kind == 'U' for d in dtypes) 1360