1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2 3import itertools 4import warnings 5import weakref 6 7from copy import deepcopy 8 9import numpy as np 10from numpy import ma 11 12from astropy.units import Unit, Quantity 13from astropy.utils.console import color_print 14from astropy.utils.metadata import MetaData 15from astropy.utils.data_info import BaseColumnInfo, dtype_info_name 16from astropy.utils.misc import dtype_bytes_or_chars 17from . import groups 18from . import pprint 19from .np_utils import fix_column_name 20 21# These "shims" provide __getitem__ implementations for Column and MaskedColumn 22from ._column_mixins import _ColumnGetitemShim, _MaskedColumnGetitemShim 23 24# Create a generic TableFormatter object for use by bare columns with no 25# parent table. 26FORMATTER = pprint.TableFormatter() 27 28 29class StringTruncateWarning(UserWarning): 30 """ 31 Warning class for when a string column is assigned a value 32 that gets truncated because the base (numpy) string length 33 is too short. 34 35 This does not inherit from AstropyWarning because we want to use 36 stacklevel=2 to show the user where the issue occurred in their code. 37 """ 38 pass 39 40 41# Always emit this warning, not just the first instance 42warnings.simplefilter('always', StringTruncateWarning) 43 44 45def _auto_names(n_cols): 46 from . import conf 47 return [str(conf.auto_colname).format(i) for i in range(n_cols)] 48 49 50# list of one and two-dimensional comparison functions, which sometimes return 51# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure 52# they only return plain (masked) arrays (see #1446 and #1685) 53_comparison_functions = set( 54 [np.greater, np.greater_equal, np.less, np.less_equal, 55 np.not_equal, np.equal, 56 np.isfinite, np.isinf, np.isnan, np.sign, np.signbit]) 57 58 59def col_copy(col, copy_indices=True): 60 """ 61 Mixin-safe version of Column.copy() (with copy_data=True). 62 63 Parameters 64 ---------- 65 col : Column or mixin column 66 Input column 67 copy_indices : bool 68 Copy the column ``indices`` attribute 69 70 Returns 71 ------- 72 col : Copy of input column 73 """ 74 if isinstance(col, BaseColumn): 75 return col.copy() 76 77 newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col) 78 # If the column has info defined, we copy it and adjust any indices 79 # to point to the copied column. By guarding with the if statement, 80 # we avoid side effects (of creating the default info instance). 81 if 'info' in col.__dict__: 82 newcol.info = col.info 83 if copy_indices and col.info.indices: 84 newcol.info.indices = deepcopy(col.info.indices) 85 for index in newcol.info.indices: 86 index.replace_col(col, newcol) 87 88 return newcol 89 90 91class FalseArray(np.ndarray): 92 """ 93 Boolean mask array that is always False. 94 95 This is used to create a stub ``mask`` property which is a boolean array of 96 ``False`` used by default for mixin columns and corresponding to the mixin 97 column data shape. The ``mask`` looks like a normal numpy array but an 98 exception will be raised if ``True`` is assigned to any element. The 99 consequences of the limitation are most obvious in the high-level table 100 operations. 101 102 Parameters 103 ---------- 104 shape : tuple 105 Data shape 106 """ 107 def __new__(cls, shape): 108 obj = np.zeros(shape, dtype=bool).view(cls) 109 return obj 110 111 def __setitem__(self, item, val): 112 val = np.asarray(val) 113 if np.any(val): 114 raise ValueError('Cannot set any element of {} class to True' 115 .format(self.__class__.__name__)) 116 117 118def _expand_string_array_for_values(arr, values): 119 """ 120 For string-dtype return a version of ``arr`` that is wide enough for ``values``. 121 If ``arr`` is not string-dtype or does not need expansion then return ``arr``. 122 123 Parameters 124 ---------- 125 arr : np.ndarray 126 Input array 127 values : scalar or array-like 128 Values for width comparison for string arrays 129 130 Returns 131 ------- 132 arr_expanded : np.ndarray 133 134 """ 135 if arr.dtype.kind in ('U', 'S') and values is not np.ma.masked: 136 # Find the length of the longest string in the new values. 137 values_str_len = np.char.str_len(values).max() 138 139 # Determine character repeat count of arr.dtype. Returns a positive 140 # int or None (something like 'U0' is not possible in numpy). If new values 141 # are longer than current then make a new (wider) version of arr. 142 arr_str_len = dtype_bytes_or_chars(arr.dtype) 143 if arr_str_len and values_str_len > arr_str_len: 144 arr_dtype = arr.dtype.byteorder + arr.dtype.kind + str(values_str_len) 145 arr = arr.astype(arr_dtype) 146 147 return arr 148 149 150def _convert_sequence_data_to_array(data, dtype=None): 151 """Convert N-d sequence-like data to ndarray or MaskedArray. 152 153 This is the core function for converting Python lists or list of lists to a 154 numpy array. This handles embedded np.ma.masked constants in ``data`` along 155 with the special case of an homogeneous list of MaskedArray elements. 156 157 Considerations: 158 159 - np.ma.array is about 50 times slower than np.array for list input. This 160 function avoids using np.ma.array on list input. 161 - np.array emits a UserWarning for embedded np.ma.masked, but only for int 162 or float inputs. For those it converts to np.nan and forces float dtype. 163 For other types np.array is inconsistent, for instance converting 164 np.ma.masked to "0.0" for str types. 165 - Searching in pure Python for np.ma.masked in ``data`` is comparable in 166 speed to calling ``np.array(data)``. 167 - This function may end up making two additional copies of input ``data``. 168 169 Parameters 170 ---------- 171 data : N-d sequence 172 Input data, typically list or list of lists 173 dtype : None or dtype-like 174 Output datatype (None lets np.array choose) 175 176 Returns 177 ------- 178 np_data : np.ndarray or np.ma.MaskedArray 179 180 """ 181 np_ma_masked = np.ma.masked # Avoid repeated lookups of this object 182 183 # Special case of an homogeneous list of MaskedArray elements (see #8977). 184 # np.ma.masked is an instance of MaskedArray, so exclude those values. 185 if (hasattr(data, '__len__') 186 and len(data) > 0 187 and all(isinstance(val, np.ma.MaskedArray) 188 and val is not np_ma_masked for val in data)): 189 np_data = np.ma.array(data, dtype=dtype) 190 return np_data 191 192 # First convert data to a plain ndarray. If there are instances of np.ma.masked 193 # in the data this will issue a warning for int and float. 194 with warnings.catch_warnings(record=True) as warns: 195 # Ensure this warning from numpy is always enabled and that it is not 196 # converted to an error (which can happen during pytest). 197 warnings.filterwarnings('always', category=UserWarning, 198 message='.*converting a masked element.*') 199 # FutureWarning in numpy 1.21. See https://github.com/astropy/astropy/issues/11291 200 # and https://github.com/numpy/numpy/issues/18425. 201 warnings.filterwarnings('always', category=FutureWarning, 202 message='.*Promotion of numbers and bools to strings.*') 203 try: 204 np_data = np.array(data, dtype=dtype) 205 except np.ma.MaskError: 206 # Catches case of dtype=int with masked values, instead let it 207 # convert to float 208 np_data = np.array(data) 209 except Exception: 210 # Conversion failed for some reason, e.g. [2, 1*u.m] gives TypeError in Quantity. 211 # First try to interpret the data as Quantity. If that still fails then fall 212 # through to object 213 try: 214 np_data = Quantity(data, dtype) 215 except Exception: 216 dtype = object 217 np_data = np.array(data, dtype=dtype) 218 219 if np_data.ndim == 0 or (np_data.ndim > 0 and len(np_data) == 0): 220 # Implies input was a scalar or an empty list (e.g. initializing an 221 # empty table with pre-declared names and dtypes but no data). Here we 222 # need to fall through to initializing with the original data=[]. 223 return data 224 225 # If there were no warnings and the data are int or float, then we are done. 226 # Other dtypes like string or complex can have masked values and the 227 # np.array() conversion gives the wrong answer (e.g. converting np.ma.masked 228 # to the string "0.0"). 229 if len(warns) == 0 and np_data.dtype.kind in ('i', 'f'): 230 return np_data 231 232 # Now we need to determine if there is an np.ma.masked anywhere in input data. 233 234 # Make a statement like below to look for np.ma.masked in a nested sequence. 235 # Because np.array(data) succeeded we know that `data` has a regular N-d 236 # structure. Find ma_masked: 237 # any(any(any(d2 is ma_masked for d2 in d1) for d1 in d0) for d0 in data) 238 # Using this eval avoids creating a copy of `data` in the more-usual case of 239 # no masked elements. 240 any_statement = 'd0 is ma_masked' 241 for ii in reversed(range(np_data.ndim)): 242 if ii == 0: 243 any_statement = f'any({any_statement} for d0 in data)' 244 elif ii == np_data.ndim - 1: 245 any_statement = f'any(d{ii} is ma_masked for d{ii} in d{ii-1})' 246 else: 247 any_statement = f'any({any_statement} for d{ii} in d{ii-1})' 248 context = {'ma_masked': np.ma.masked, 'data': data} 249 has_masked = eval(any_statement, context) 250 251 # If there are any masks then explicitly change each one to a fill value and 252 # set a mask boolean array. If not has_masked then we're done. 253 if has_masked: 254 mask = np.zeros(np_data.shape, dtype=bool) 255 data_filled = np.array(data, dtype=object) 256 257 # Make type-appropriate fill value based on initial conversion. 258 if np_data.dtype.kind == 'U': 259 fill = '' 260 elif np_data.dtype.kind == 'S': 261 fill = b'' 262 else: 263 # Zero works for every numeric type. 264 fill = 0 265 266 ranges = [range(dim) for dim in np_data.shape] 267 for idxs in itertools.product(*ranges): 268 val = data_filled[idxs] 269 if val is np_ma_masked: 270 data_filled[idxs] = fill 271 mask[idxs] = True 272 elif isinstance(val, bool) and dtype is None: 273 # If we see a bool and dtype not specified then assume bool for 274 # the entire array. Not perfect but in most practical cases OK. 275 # Unfortunately numpy types [False, 0] as int, not bool (and 276 # [False, np.ma.masked] => array([0.0, np.nan])). 277 dtype = bool 278 279 # If no dtype is provided then need to convert back to list so np.array 280 # does type autodetection. 281 if dtype is None: 282 data_filled = data_filled.tolist() 283 284 # Use np.array first to convert `data` to ndarray (fast) and then make 285 # masked array from an ndarray with mask (fast) instead of from `data`. 286 np_data = np.ma.array(np.array(data_filled, dtype=dtype), mask=mask) 287 288 return np_data 289 290 291def _make_compare(oper): 292 """ 293 Make Column comparison methods which encode the ``other`` object to utf-8 294 in the case of a bytestring dtype for Py3+. 295 296 Parameters 297 ---------- 298 oper : str 299 Operator name 300 """ 301 swapped_oper = {'__eq__': '__eq__', 302 '__ne__': '__ne__', 303 '__gt__': '__lt__', 304 '__lt__': '__gt__', 305 '__ge__': '__le__', 306 '__le__': '__ge__'}[oper] 307 308 def _compare(self, other): 309 op = oper # copy enclosed ref to allow swap below 310 311 # Special case to work around #6838. Other combinations work OK, 312 # see tests.test_column.test_unicode_sandwich_compare(). In this 313 # case just swap self and other. 314 # 315 # This is related to an issue in numpy that was addressed in np 1.13. 316 # However that fix does not make this problem go away, but maybe 317 # future numpy versions will do so. NUMPY_LT_1_13 to get the 318 # attention of future maintainers to check (by deleting or versioning 319 # the if block below). See #6899 discussion. 320 # 2019-06-21: still needed with numpy 1.16. 321 if (isinstance(self, MaskedColumn) and self.dtype.kind == 'U' 322 and isinstance(other, MaskedColumn) and other.dtype.kind == 'S'): 323 self, other = other, self 324 op = swapped_oper 325 326 if self.dtype.char == 'S': 327 other = self._encode_str(other) 328 329 # Now just let the regular ndarray.__eq__, etc., take over. 330 result = getattr(super(Column, self), op)(other) 331 # But we should not return Column instances for this case. 332 return result.data if isinstance(result, Column) else result 333 334 return _compare 335 336 337class ColumnInfo(BaseColumnInfo): 338 """ 339 Container for meta information like name, description, format. 340 341 This is required when the object is used as a mixin column within a table, 342 but can be used as a general way to store meta information. 343 """ 344 attrs_from_parent = BaseColumnInfo.attr_names 345 _supports_indexing = True 346 347 def new_like(self, cols, length, metadata_conflicts='warn', name=None): 348 """ 349 Return a new Column instance which is consistent with the 350 input ``cols`` and has ``length`` rows. 351 352 This is intended for creating an empty column object whose elements can 353 be set in-place for table operations like join or vstack. 354 355 Parameters 356 ---------- 357 cols : list 358 List of input columns 359 length : int 360 Length of the output column object 361 metadata_conflicts : str ('warn'|'error'|'silent') 362 How to handle metadata conflicts 363 name : str 364 Output column name 365 366 Returns 367 ------- 368 col : Column (or subclass) 369 New instance of this class consistent with ``cols`` 370 371 """ 372 attrs = self.merge_cols_attributes(cols, metadata_conflicts, name, 373 ('meta', 'unit', 'format', 'description')) 374 375 return self._parent_cls(length=length, **attrs) 376 377 def get_sortable_arrays(self): 378 """ 379 Return a list of arrays which can be lexically sorted to represent 380 the order of the parent column. 381 382 For Column this is just the column itself. 383 384 Returns 385 ------- 386 arrays : list of ndarray 387 """ 388 return [self._parent] 389 390 391class BaseColumn(_ColumnGetitemShim, np.ndarray): 392 393 meta = MetaData() 394 395 def __new__(cls, data=None, name=None, 396 dtype=None, shape=(), length=0, 397 description=None, unit=None, format=None, meta=None, 398 copy=False, copy_indices=True): 399 if data is None: 400 self_data = np.zeros((length,)+shape, dtype=dtype) 401 elif isinstance(data, BaseColumn) and hasattr(data, '_name'): 402 # When unpickling a MaskedColumn, ``data`` will be a bare 403 # BaseColumn with none of the expected attributes. In this case 404 # do NOT execute this block which initializes from ``data`` 405 # attributes. 406 self_data = np.array(data.data, dtype=dtype, copy=copy) 407 if description is None: 408 description = data.description 409 if unit is None: 410 unit = unit or data.unit 411 if format is None: 412 format = data.format 413 if meta is None: 414 meta = data.meta 415 if name is None: 416 name = data.name 417 elif isinstance(data, Quantity): 418 if unit is None: 419 self_data = np.array(data, dtype=dtype, copy=copy) 420 unit = data.unit 421 else: 422 self_data = Quantity(data, unit, dtype=dtype, copy=copy).value 423 # If 'info' has been defined, copy basic properties (if needed). 424 if 'info' in data.__dict__: 425 if description is None: 426 description = data.info.description 427 if format is None: 428 format = data.info.format 429 if meta is None: 430 meta = data.info.meta 431 432 else: 433 if np.dtype(dtype).char == 'S': 434 data = cls._encode_str(data) 435 self_data = np.array(data, dtype=dtype, copy=copy) 436 437 self = self_data.view(cls) 438 self._name = fix_column_name(name) 439 self._parent_table = None 440 self.unit = unit 441 self._format = format 442 self.description = description 443 self.meta = meta 444 self.indices = deepcopy(getattr(data, 'indices', [])) if copy_indices else [] 445 for index in self.indices: 446 index.replace_col(data, self) 447 448 return self 449 450 @property 451 def data(self): 452 return self.view(np.ndarray) 453 454 @property 455 def value(self): 456 return self.data 457 458 @property 459 def parent_table(self): 460 # Note: It seems there are some cases where _parent_table is not set, 461 # such after restoring from a pickled Column. Perhaps that should be 462 # fixed, but this is also okay for now. 463 if getattr(self, '_parent_table', None) is None: 464 return None 465 else: 466 return self._parent_table() 467 468 @parent_table.setter 469 def parent_table(self, table): 470 if table is None: 471 self._parent_table = None 472 else: 473 self._parent_table = weakref.ref(table) 474 475 info = ColumnInfo() 476 477 def copy(self, order='C', data=None, copy_data=True): 478 """ 479 Return a copy of the current instance. 480 481 If ``data`` is supplied then a view (reference) of ``data`` is used, 482 and ``copy_data`` is ignored. 483 484 Parameters 485 ---------- 486 order : {'C', 'F', 'A', 'K'}, optional 487 Controls the memory layout of the copy. 'C' means C-order, 488 'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous, 489 'C' otherwise. 'K' means match the layout of ``a`` as closely 490 as possible. (Note that this function and :func:numpy.copy are very 491 similar, but have different default values for their order= 492 arguments.) Default is 'C'. 493 data : array, optional 494 If supplied then use a view of ``data`` instead of the instance 495 data. This allows copying the instance attributes and meta. 496 copy_data : bool, optional 497 Make a copy of the internal numpy array instead of using a 498 reference. Default is True. 499 500 Returns 501 ------- 502 col : Column or MaskedColumn 503 Copy of the current column (same type as original) 504 """ 505 if data is None: 506 data = self.data 507 if copy_data: 508 data = data.copy(order) 509 510 out = data.view(self.__class__) 511 out.__array_finalize__(self) 512 513 # If there is meta on the original column then deepcopy (since "copy" of column 514 # implies complete independence from original). __array_finalize__ will have already 515 # made a light copy. I'm not sure how to avoid that initial light copy. 516 if self.meta is not None: 517 out.meta = self.meta # MetaData descriptor does a deepcopy here 518 519 # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask 520 # from self, which is not the idea here, so undo 521 if isinstance(self, MaskedColumn): 522 out._mask = data._mask 523 524 self._copy_groups(out) 525 526 return out 527 528 def __setstate__(self, state): 529 """ 530 Restore the internal state of the Column/MaskedColumn for pickling 531 purposes. This requires that the last element of ``state`` is a 532 5-tuple that has Column-specific state values. 533 """ 534 # Get the Column attributes 535 names = ('_name', '_unit', '_format', 'description', 'meta', 'indices') 536 attrs = {name: val for name, val in zip(names, state[-1])} 537 538 state = state[:-1] 539 540 # Using super().__setstate__(state) gives 541 # "TypeError 'int' object is not iterable", raised in 542 # astropy.table._column_mixins._ColumnGetitemShim.__setstate_cython__() 543 # Previously, it seems to have given an infinite recursion. 544 # Hence, manually call the right super class to actually set up 545 # the array object. 546 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 547 super_class.__setstate__(self, state) 548 549 # Set the Column attributes 550 for name, val in attrs.items(): 551 setattr(self, name, val) 552 self._parent_table = None 553 554 def __reduce__(self): 555 """ 556 Return a 3-tuple for pickling a Column. Use the super-class 557 functionality but then add in a 5-tuple of Column-specific values 558 that get used in __setstate__. 559 """ 560 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 561 reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self) 562 563 # Define Column-specific attrs and meta that gets added to state. 564 column_state = (self.name, self.unit, self.format, self.description, 565 self.meta, self.indices) 566 state = state + (column_state,) 567 568 return reconstruct_func, reconstruct_func_args, state 569 570 def __array_finalize__(self, obj): 571 # Obj will be none for direct call to Column() creator 572 if obj is None: 573 return 574 575 if callable(super().__array_finalize__): 576 super().__array_finalize__(obj) 577 578 # Self was created from template (e.g. obj[slice] or (obj * 2)) 579 # or viewcast e.g. obj.view(Column). In either case we want to 580 # init Column attributes for self from obj if possible. 581 self.parent_table = None 582 if not hasattr(self, 'indices'): # may have been copied in __new__ 583 self.indices = [] 584 self._copy_attrs(obj) 585 if 'info' in getattr(obj, '__dict__', {}): 586 self.info = obj.info 587 588 def __array_wrap__(self, out_arr, context=None): 589 """ 590 __array_wrap__ is called at the end of every ufunc. 591 592 Normally, we want a Column object back and do not have to do anything 593 special. But there are two exceptions: 594 595 1) If the output shape is different (e.g. for reduction ufuncs 596 like sum() or mean()), a Column still linking to a parent_table 597 makes little sense, so we return the output viewed as the 598 column content (ndarray or MaskedArray). 599 For this case, we use "[()]" to select everything, and to ensure we 600 convert a zero rank array to a scalar. (For some reason np.sum() 601 returns a zero rank scalar array while np.mean() returns a scalar; 602 So the [()] is needed for this case. 603 604 2) When the output is created by any function that returns a boolean 605 we also want to consistently return an array rather than a column 606 (see #1446 and #1685) 607 """ 608 out_arr = super().__array_wrap__(out_arr, context) 609 if (self.shape != out_arr.shape 610 or (isinstance(out_arr, BaseColumn) 611 and (context is not None 612 and context[0] in _comparison_functions))): 613 return out_arr.data[()] 614 else: 615 return out_arr 616 617 @property 618 def name(self): 619 """ 620 The name of this column. 621 """ 622 return self._name 623 624 @name.setter 625 def name(self, val): 626 val = fix_column_name(val) 627 628 if self.parent_table is not None: 629 table = self.parent_table 630 table.columns._rename_column(self.name, val) 631 632 self._name = val 633 634 @property 635 def format(self): 636 """ 637 Format string for displaying values in this column. 638 """ 639 640 return self._format 641 642 @format.setter 643 def format(self, format_string): 644 645 prev_format = getattr(self, '_format', None) 646 647 self._format = format_string # set new format string 648 649 try: 650 # test whether it formats without error exemplarily 651 self.pformat(max_lines=1) 652 except Exception as err: 653 # revert to restore previous format if there was one 654 self._format = prev_format 655 raise ValueError( 656 "Invalid format for column '{}': could not display " 657 "values in this column using this format".format( 658 self.name)) from err 659 660 @property 661 def descr(self): 662 """Array-interface compliant full description of the column. 663 664 This returns a 3-tuple (name, type, shape) that can always be 665 used in a structured array dtype definition. 666 """ 667 return (self.name, self.dtype.str, self.shape[1:]) 668 669 def iter_str_vals(self): 670 """ 671 Return an iterator that yields the string-formatted values of this 672 column. 673 674 Returns 675 ------- 676 str_vals : iterator 677 Column values formatted as strings 678 """ 679 # Iterate over formatted values with no max number of lines, no column 680 # name, no unit, and ignoring the returned header info in outs. 681 _pformat_col_iter = self._formatter._pformat_col_iter 682 for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False, 683 show_dtype=False, outs={}): 684 yield str_val 685 686 def attrs_equal(self, col): 687 """Compare the column attributes of ``col`` to this object. 688 689 The comparison attributes are: ``name``, ``unit``, ``dtype``, 690 ``format``, ``description``, and ``meta``. 691 692 Parameters 693 ---------- 694 col : Column 695 Comparison column 696 697 Returns 698 ------- 699 equal : bool 700 True if all attributes are equal 701 """ 702 if not isinstance(col, BaseColumn): 703 raise ValueError('Comparison `col` must be a Column or ' 704 'MaskedColumn object') 705 706 attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta') 707 equal = all(getattr(self, x) == getattr(col, x) for x in attrs) 708 709 return equal 710 711 @property 712 def _formatter(self): 713 return FORMATTER if (self.parent_table is None) else self.parent_table.formatter 714 715 def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False, 716 html=False): 717 """Return a list of formatted string representation of column values. 718 719 If no value of ``max_lines`` is supplied then the height of the 720 screen terminal is used to set ``max_lines``. If the terminal 721 height cannot be determined then the default will be 722 determined using the ``astropy.conf.max_lines`` configuration 723 item. If a negative value of ``max_lines`` is supplied then 724 there is no line limit applied. 725 726 Parameters 727 ---------- 728 max_lines : int 729 Maximum lines of output (header + data rows) 730 731 show_name : bool 732 Include column name. Default is True. 733 734 show_unit : bool 735 Include a header row for unit. Default is False. 736 737 show_dtype : bool 738 Include column dtype. Default is False. 739 740 html : bool 741 Format the output as an HTML table. Default is False. 742 743 Returns 744 ------- 745 lines : list 746 List of lines with header and formatted column values 747 748 """ 749 _pformat_col = self._formatter._pformat_col 750 lines, outs = _pformat_col(self, max_lines, show_name=show_name, 751 show_unit=show_unit, show_dtype=show_dtype, 752 html=html) 753 return lines 754 755 def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False): 756 """Print a formatted string representation of column values. 757 758 If no value of ``max_lines`` is supplied then the height of the 759 screen terminal is used to set ``max_lines``. If the terminal 760 height cannot be determined then the default will be 761 determined using the ``astropy.conf.max_lines`` configuration 762 item. If a negative value of ``max_lines`` is supplied then 763 there is no line limit applied. 764 765 Parameters 766 ---------- 767 max_lines : int 768 Maximum number of values in output 769 770 show_name : bool 771 Include column name. Default is True. 772 773 show_unit : bool 774 Include a header row for unit. Default is False. 775 776 show_dtype : bool 777 Include column dtype. Default is True. 778 """ 779 _pformat_col = self._formatter._pformat_col 780 lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit, 781 show_dtype=show_dtype) 782 783 n_header = outs['n_header'] 784 for i, line in enumerate(lines): 785 if i < n_header: 786 color_print(line, 'red') 787 else: 788 print(line) 789 790 def more(self, max_lines=None, show_name=True, show_unit=False): 791 """Interactively browse column with a paging interface. 792 793 Supported keys:: 794 795 f, <space> : forward one page 796 b : back one page 797 r : refresh same page 798 n : next row 799 p : previous row 800 < : go to beginning 801 > : go to end 802 q : quit browsing 803 h : print this help 804 805 Parameters 806 ---------- 807 max_lines : int 808 Maximum number of lines in table output. 809 810 show_name : bool 811 Include a header row for column names. Default is True. 812 813 show_unit : bool 814 Include a header row for unit. Default is False. 815 816 """ 817 _more_tabcol = self._formatter._more_tabcol 818 _more_tabcol(self, max_lines=max_lines, show_name=show_name, 819 show_unit=show_unit) 820 821 @property 822 def unit(self): 823 """ 824 The unit associated with this column. May be a string or a 825 `astropy.units.UnitBase` instance. 826 827 Setting the ``unit`` property does not change the values of the 828 data. To perform a unit conversion, use ``convert_unit_to``. 829 """ 830 return self._unit 831 832 @unit.setter 833 def unit(self, unit): 834 if unit is None: 835 self._unit = None 836 else: 837 self._unit = Unit(unit, parse_strict='silent') 838 839 @unit.deleter 840 def unit(self): 841 self._unit = None 842 843 def convert_unit_to(self, new_unit, equivalencies=[]): 844 """ 845 Converts the values of the column in-place from the current 846 unit to the given unit. 847 848 To change the unit associated with this column without 849 actually changing the data values, simply set the ``unit`` 850 property. 851 852 Parameters 853 ---------- 854 new_unit : str or `astropy.units.UnitBase` instance 855 The unit to convert to. 856 857 equivalencies : list of tuple 858 A list of equivalence pairs to try if the unit are not 859 directly convertible. See :ref:`astropy:unit_equivalencies`. 860 861 Raises 862 ------ 863 astropy.units.UnitsError 864 If units are inconsistent 865 """ 866 if self.unit is None: 867 raise ValueError("No unit set on column") 868 self.data[:] = self.unit.to( 869 new_unit, self.data, equivalencies=equivalencies) 870 self.unit = new_unit 871 872 @property 873 def groups(self): 874 if not hasattr(self, '_groups'): 875 self._groups = groups.ColumnGroups(self) 876 return self._groups 877 878 def group_by(self, keys): 879 """ 880 Group this column by the specified ``keys`` 881 882 This effectively splits the column into groups which correspond to 883 unique values of the ``keys`` grouping object. The output is a new 884 `Column` or `MaskedColumn` which contains a copy of this column but 885 sorted by row according to ``keys``. 886 887 The ``keys`` input to ``group_by`` must be a numpy array with the 888 same length as this column. 889 890 Parameters 891 ---------- 892 keys : numpy array 893 Key grouping object 894 895 Returns 896 ------- 897 out : Column 898 New column with groups attribute set accordingly 899 """ 900 return groups.column_group_by(self, keys) 901 902 def _copy_groups(self, out): 903 """ 904 Copy current groups into a copy of self ``out`` 905 """ 906 if self.parent_table: 907 if hasattr(self.parent_table, '_groups'): 908 out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices) 909 elif hasattr(self, '_groups'): 910 out._groups = groups.ColumnGroups(out, indices=self._groups._indices) 911 912 # Strip off the BaseColumn-ness for repr and str so that 913 # MaskedColumn.data __repr__ does not include masked_BaseColumn(data = 914 # [1 2], ...). 915 def __repr__(self): 916 return np.asarray(self).__repr__() 917 918 @property 919 def quantity(self): 920 """ 921 A view of this table column as a `~astropy.units.Quantity` object with 922 units given by the Column's `unit` parameter. 923 """ 924 # the Quantity initializer is used here because it correctly fails 925 # if the column's values are non-numeric (like strings), while .view 926 # will happily return a quantity with gibberish for numerical values 927 return Quantity(self, self.unit, copy=False, dtype=self.dtype, order='A', subok=True) 928 929 def to(self, unit, equivalencies=[], **kwargs): 930 """ 931 Converts this table column to a `~astropy.units.Quantity` object with 932 the requested units. 933 934 Parameters 935 ---------- 936 unit : unit-like 937 The unit to convert to (i.e., a valid argument to the 938 :meth:`astropy.units.Quantity.to` method). 939 equivalencies : list of tuple 940 Equivalencies to use for this conversion. See 941 :meth:`astropy.units.Quantity.to` for more details. 942 943 Returns 944 ------- 945 quantity : `~astropy.units.Quantity` 946 A quantity object with the contents of this column in the units 947 ``unit``. 948 """ 949 return self.quantity.to(unit, equivalencies) 950 951 def _copy_attrs(self, obj): 952 """ 953 Copy key column attributes from ``obj`` to self 954 """ 955 for attr in ('name', 'unit', '_format', 'description'): 956 val = getattr(obj, attr, None) 957 setattr(self, attr, val) 958 959 # Light copy of meta if it is not empty 960 obj_meta = getattr(obj, 'meta', None) 961 if obj_meta: 962 self.meta = obj_meta.copy() 963 964 @staticmethod 965 def _encode_str(value): 966 """ 967 Encode anything that is unicode-ish as utf-8. This method is only 968 called for Py3+. 969 """ 970 if isinstance(value, str): 971 value = value.encode('utf-8') 972 elif isinstance(value, bytes) or value is np.ma.masked: 973 pass 974 else: 975 arr = np.asarray(value) 976 if arr.dtype.char == 'U': 977 arr = np.char.encode(arr, encoding='utf-8') 978 if isinstance(value, np.ma.MaskedArray): 979 arr = np.ma.array(arr, mask=value.mask, copy=False) 980 value = arr 981 982 return value 983 984 def tolist(self): 985 if self.dtype.kind == 'S': 986 return np.chararray.decode(self, encoding='utf-8').tolist() 987 else: 988 return super().tolist() 989 990 991class Column(BaseColumn): 992 """Define a data column for use in a Table object. 993 994 Parameters 995 ---------- 996 data : list, ndarray, or None 997 Column data values 998 name : str 999 Column name and key for reference within Table 1000 dtype : `~numpy.dtype`-like 1001 Data type for column 1002 shape : tuple or () 1003 Dimensions of a single row element in the column data 1004 length : int or 0 1005 Number of row elements in column data 1006 description : str or None 1007 Full description of column 1008 unit : str or None 1009 Physical unit 1010 format : str, None, or callable 1011 Format string for outputting column values. This can be an 1012 "old-style" (``format % value``) or "new-style" (`str.format`) 1013 format specification string or a function or any callable object that 1014 accepts a single value and returns a string. 1015 meta : dict-like or None 1016 Meta-data associated with the column 1017 1018 Examples 1019 -------- 1020 A Column can be created in two different ways: 1021 1022 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 1023 inferred from the data). 1024 1025 Examples:: 1026 1027 col = Column(data=[1, 2], name='name') # shape=(2,) 1028 col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2) 1029 col = Column(data=[1, 2], name='name', dtype=float) 1030 col = Column(data=np.array([1, 2]), name='name') 1031 col = Column(data=['hello', 'world'], name='name') 1032 1033 The ``dtype`` argument can be any value which is an acceptable 1034 fixed-size data-type initializer for the numpy.dtype() method. See 1035 `<https://numpy.org/doc/stable/reference/arrays.dtypes.html>`_. 1036 Examples include: 1037 1038 - Python non-string type (float, int, bool) 1039 - Numpy non-string type (e.g. np.float32, np.int64, np.bool\\_) 1040 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 1041 1042 If no ``dtype`` value is provide then the type is inferred using 1043 ``np.array(data)``. 1044 1045 - Provide ``length`` and optionally ``shape``, but not ``data`` 1046 1047 Examples:: 1048 1049 col = Column(name='name', length=5) 1050 col = Column(name='name', dtype=int, length=10, shape=(3,4)) 1051 1052 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 1053 array shape of a single cell in the column. 1054 """ 1055 1056 def __new__(cls, data=None, name=None, 1057 dtype=None, shape=(), length=0, 1058 description=None, unit=None, format=None, meta=None, 1059 copy=False, copy_indices=True): 1060 1061 if isinstance(data, MaskedColumn) and np.any(data.mask): 1062 raise TypeError("Cannot convert a MaskedColumn with masked value to a Column") 1063 1064 self = super().__new__( 1065 cls, data=data, name=name, dtype=dtype, shape=shape, length=length, 1066 description=description, unit=unit, format=format, meta=meta, 1067 copy=copy, copy_indices=copy_indices) 1068 return self 1069 1070 def __setattr__(self, item, value): 1071 if not isinstance(self, MaskedColumn) and item == "mask": 1072 raise AttributeError("cannot set mask value to a column in non-masked Table") 1073 super().__setattr__(item, value) 1074 1075 if item == 'unit' and issubclass(self.dtype.type, np.number): 1076 try: 1077 converted = self.parent_table._convert_col_for_table(self) 1078 except AttributeError: # Either no parent table or parent table is None 1079 pass 1080 else: 1081 if converted is not self: 1082 self.parent_table.replace_column(self.name, converted) 1083 1084 def _base_repr_(self, html=False): 1085 # If scalar then just convert to correct numpy type and use numpy repr 1086 if self.ndim == 0: 1087 return repr(self.item()) 1088 1089 descr_vals = [self.__class__.__name__] 1090 unit = None if self.unit is None else str(self.unit) 1091 shape = None if self.ndim <= 1 else self.shape[1:] 1092 for attr, val in (('name', self.name), 1093 ('dtype', dtype_info_name(self.dtype)), 1094 ('shape', shape), 1095 ('unit', unit), 1096 ('format', self.format), 1097 ('description', self.description), 1098 ('length', len(self))): 1099 1100 if val is not None: 1101 descr_vals.append(f'{attr}={val!r}') 1102 1103 descr = '<' + ' '.join(descr_vals) + '>\n' 1104 1105 if html: 1106 from astropy.utils.xml.writer import xml_escape 1107 descr = xml_escape(descr) 1108 1109 data_lines, outs = self._formatter._pformat_col( 1110 self, show_name=False, show_unit=False, show_length=False, html=html) 1111 1112 out = descr + '\n'.join(data_lines) 1113 1114 return out 1115 1116 def _repr_html_(self): 1117 return self._base_repr_(html=True) 1118 1119 def __repr__(self): 1120 return self._base_repr_(html=False) 1121 1122 def __str__(self): 1123 # If scalar then just convert to correct numpy type and use numpy repr 1124 if self.ndim == 0: 1125 return str(self.item()) 1126 1127 lines, outs = self._formatter._pformat_col(self) 1128 return '\n'.join(lines) 1129 1130 def __bytes__(self): 1131 return str(self).encode('utf-8') 1132 1133 def _check_string_truncate(self, value): 1134 """ 1135 Emit a warning if any elements of ``value`` will be truncated when 1136 ``value`` is assigned to self. 1137 """ 1138 # Convert input ``value`` to the string dtype of this column and 1139 # find the length of the longest string in the array. 1140 value = np.asanyarray(value, dtype=self.dtype.type) 1141 if value.size == 0: 1142 return 1143 value_str_len = np.char.str_len(value).max() 1144 1145 # Parse the array-protocol typestring (e.g. '|U15') of self.dtype which 1146 # has the character repeat count on the right side. 1147 self_str_len = dtype_bytes_or_chars(self.dtype) 1148 1149 if value_str_len > self_str_len: 1150 warnings.warn('truncated right side string(s) longer than {} ' 1151 'character(s) during assignment' 1152 .format(self_str_len), 1153 StringTruncateWarning, 1154 stacklevel=3) 1155 1156 def __setitem__(self, index, value): 1157 if self.dtype.char == 'S': 1158 value = self._encode_str(value) 1159 1160 # Issue warning for string assignment that truncates ``value`` 1161 if issubclass(self.dtype.type, np.character): 1162 self._check_string_truncate(value) 1163 1164 # update indices 1165 self.info.adjust_indices(index, value, len(self)) 1166 1167 # Set items using a view of the underlying data, as it gives an 1168 # order-of-magnitude speed-up. [#2994] 1169 self.data[index] = value 1170 1171 __eq__ = _make_compare('__eq__') 1172 __ne__ = _make_compare('__ne__') 1173 __gt__ = _make_compare('__gt__') 1174 __lt__ = _make_compare('__lt__') 1175 __ge__ = _make_compare('__ge__') 1176 __le__ = _make_compare('__le__') 1177 1178 def insert(self, obj, values, axis=0): 1179 """ 1180 Insert values before the given indices in the column and return 1181 a new `~astropy.table.Column` object. 1182 1183 Parameters 1184 ---------- 1185 obj : int, slice or sequence of int 1186 Object that defines the index or indices before which ``values`` is 1187 inserted. 1188 values : array-like 1189 Value(s) to insert. If the type of ``values`` is different from 1190 that of the column, ``values`` is converted to the matching type. 1191 ``values`` should be shaped so that it can be broadcast appropriately. 1192 axis : int, optional 1193 Axis along which to insert ``values``. If ``axis`` is None then 1194 the column array is flattened before insertion. Default is 0, 1195 which will insert a row. 1196 1197 Returns 1198 ------- 1199 out : `~astropy.table.Column` 1200 A copy of column with ``values`` and ``mask`` inserted. Note that the 1201 insertion does not occur in-place: a new column is returned. 1202 """ 1203 if self.dtype.kind == 'O': 1204 # Even if values is array-like (e.g. [1,2,3]), insert as a single 1205 # object. Numpy.insert instead inserts each element in an array-like 1206 # input individually. 1207 data = np.insert(self, obj, None, axis=axis) 1208 data[obj] = values 1209 else: 1210 self_for_insert = _expand_string_array_for_values(self, values) 1211 data = np.insert(self_for_insert, obj, values, axis=axis) 1212 1213 out = data.view(self.__class__) 1214 out.__array_finalize__(self) 1215 return out 1216 1217 # We do this to make the methods show up in the API docs 1218 name = BaseColumn.name 1219 unit = BaseColumn.unit 1220 copy = BaseColumn.copy 1221 more = BaseColumn.more 1222 pprint = BaseColumn.pprint 1223 pformat = BaseColumn.pformat 1224 convert_unit_to = BaseColumn.convert_unit_to 1225 quantity = BaseColumn.quantity 1226 to = BaseColumn.to 1227 1228 1229class MaskedColumnInfo(ColumnInfo): 1230 """ 1231 Container for meta information like name, description, format. 1232 1233 This is required when the object is used as a mixin column within a table, 1234 but can be used as a general way to store meta information. In this case 1235 it just adds the ``mask_val`` attribute. 1236 """ 1237 # Add `serialize_method` attribute to the attrs that MaskedColumnInfo knows 1238 # about. This allows customization of the way that MaskedColumn objects 1239 # get written to file depending on format. The default is to use whatever 1240 # the writer would normally do, which in the case of FITS or ECSV is to use 1241 # a NULL value within the data itself. If serialize_method is 'data_mask' 1242 # then the mask is explicitly written out as a separate column if there 1243 # are any masked values. See also code below. 1244 attr_names = ColumnInfo.attr_names | {'serialize_method'} 1245 1246 # When `serialize_method` is 'data_mask', and data and mask are being written 1247 # as separate columns, use column names <name> and <name>.mask (instead 1248 # of default encoding as <name>.data and <name>.mask). 1249 _represent_as_dict_primary_data = 'data' 1250 1251 mask_val = np.ma.masked 1252 1253 def __init__(self, bound=False): 1254 super().__init__(bound) 1255 1256 # If bound to a data object instance then create the dict of attributes 1257 # which stores the info attribute values. 1258 if bound: 1259 # Specify how to serialize this object depending on context. 1260 self.serialize_method = {'fits': 'null_value', 1261 'ecsv': 'null_value', 1262 'hdf5': 'data_mask', 1263 'parquet': 'data_mask', 1264 None: 'null_value'} 1265 1266 def _represent_as_dict(self): 1267 out = super()._represent_as_dict() 1268 1269 col = self._parent 1270 1271 # If the serialize method for this context (e.g. 'fits' or 'ecsv') is 1272 # 'data_mask', that means to serialize using an explicit mask column. 1273 method = self.serialize_method[self._serialize_context] 1274 1275 if method == 'data_mask': 1276 # Note: a driver here is a performance issue in #8443 where repr() of a 1277 # np.ma.MaskedArray value is up to 10 times slower than repr of a normal array 1278 # value. So regardless of whether there are masked elements it is useful to 1279 # explicitly define this as a serialized column and use col.data.data (ndarray) 1280 # instead of letting it fall through to the "standard" serialization machinery. 1281 out['data'] = col.data.data 1282 1283 if np.any(col.mask): 1284 # Only if there are actually masked elements do we add the ``mask`` column 1285 out['mask'] = col.mask 1286 1287 elif method == 'null_value': 1288 pass 1289 1290 else: 1291 raise ValueError('serialize method must be either "data_mask" or "null_value"') 1292 1293 return out 1294 1295 1296class MaskedColumn(Column, _MaskedColumnGetitemShim, ma.MaskedArray): 1297 """Define a masked data column for use in a Table object. 1298 1299 Parameters 1300 ---------- 1301 data : list, ndarray, or None 1302 Column data values 1303 name : str 1304 Column name and key for reference within Table 1305 mask : list, ndarray or None 1306 Boolean mask for which True indicates missing or invalid data 1307 fill_value : float, int, str, or None 1308 Value used when filling masked column elements 1309 dtype : `~numpy.dtype`-like 1310 Data type for column 1311 shape : tuple or () 1312 Dimensions of a single row element in the column data 1313 length : int or 0 1314 Number of row elements in column data 1315 description : str or None 1316 Full description of column 1317 unit : str or None 1318 Physical unit 1319 format : str, None, or callable 1320 Format string for outputting column values. This can be an 1321 "old-style" (``format % value``) or "new-style" (`str.format`) 1322 format specification string or a function or any callable object that 1323 accepts a single value and returns a string. 1324 meta : dict-like or None 1325 Meta-data associated with the column 1326 1327 Examples 1328 -------- 1329 A MaskedColumn is similar to a Column except that it includes ``mask`` and 1330 ``fill_value`` attributes. It can be created in two different ways: 1331 1332 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 1333 inferred from the data). 1334 1335 Examples:: 1336 1337 col = MaskedColumn(data=[1, 2], name='name') 1338 col = MaskedColumn(data=[1, 2], name='name', mask=[True, False]) 1339 col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99) 1340 1341 The ``mask`` argument will be cast as a boolean array and specifies 1342 which elements are considered to be missing or invalid. 1343 1344 The ``dtype`` argument can be any value which is an acceptable 1345 fixed-size data-type initializer for the numpy.dtype() method. See 1346 `<https://numpy.org/doc/stable/reference/arrays.dtypes.html>`_. 1347 Examples include: 1348 1349 - Python non-string type (float, int, bool) 1350 - Numpy non-string type (e.g. np.float32, np.int64, np.bool\\_) 1351 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 1352 1353 If no ``dtype`` value is provide then the type is inferred using 1354 ``np.array(data)``. When ``data`` is provided then the ``shape`` 1355 and ``length`` arguments are ignored. 1356 1357 - Provide ``length`` and optionally ``shape``, but not ``data`` 1358 1359 Examples:: 1360 1361 col = MaskedColumn(name='name', length=5) 1362 col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4)) 1363 1364 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 1365 array shape of a single cell in the column. 1366 """ 1367 info = MaskedColumnInfo() 1368 1369 def __new__(cls, data=None, name=None, mask=None, fill_value=None, 1370 dtype=None, shape=(), length=0, 1371 description=None, unit=None, format=None, meta=None, 1372 copy=False, copy_indices=True): 1373 1374 if mask is None: 1375 # If mask is None then we need to determine the mask (if any) from the data. 1376 # The naive method is looking for a mask attribute on data, but this can fail, 1377 # see #8816. Instead use ``MaskedArray`` to do the work. 1378 mask = ma.MaskedArray(data).mask 1379 if mask is np.ma.nomask: 1380 # Handle odd-ball issue with np.ma.nomask (numpy #13758), and see below. 1381 mask = False 1382 elif copy: 1383 mask = mask.copy() 1384 1385 elif mask is np.ma.nomask: 1386 # Force the creation of a full mask array as nomask is tricky to 1387 # use and will fail in an unexpected manner when setting a value 1388 # to the mask. 1389 mask = False 1390 else: 1391 mask = deepcopy(mask) 1392 1393 # Create self using MaskedArray as a wrapper class, following the example of 1394 # class MSubArray in 1395 # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py 1396 # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and 1397 # https://github.com/astropy/astropy/commit/ff6039e8) 1398 1399 # First just pass through all args and kwargs to BaseColumn, then wrap that object 1400 # with MaskedArray. 1401 self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name, 1402 unit=unit, format=format, description=description, 1403 meta=meta, copy=copy, copy_indices=copy_indices) 1404 self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask) 1405 # The above process preserves info relevant for Column, but this does 1406 # not include serialize_method (and possibly other future attributes) 1407 # relevant for MaskedColumn, so we set info explicitly. 1408 if 'info' in getattr(data, '__dict__', {}): 1409 self.info = data.info 1410 1411 # Note: do not set fill_value in the MaskedArray constructor because this does not 1412 # go through the fill_value workarounds. 1413 if fill_value is None and getattr(data, 'fill_value', None) is not None: 1414 # Coerce the fill_value to the correct type since `data` may be a 1415 # different dtype than self. 1416 fill_value = np.array(data.fill_value, self.dtype)[()] 1417 self.fill_value = fill_value 1418 1419 self.parent_table = None 1420 1421 # needs to be done here since self doesn't come from BaseColumn.__new__ 1422 for index in self.indices: 1423 index.replace_col(self_data, self) 1424 1425 return self 1426 1427 @property 1428 def fill_value(self): 1429 return self.get_fill_value() # defer to native ma.MaskedArray method 1430 1431 @fill_value.setter 1432 def fill_value(self, val): 1433 """Set fill value both in the masked column view and in the parent table 1434 if it exists. Setting one or the other alone doesn't work.""" 1435 1436 # another ma bug workaround: If the value of fill_value for a string array is 1437 # requested but not yet set then it gets created as 'N/A'. From this point onward 1438 # any new fill_values are truncated to 3 characters. Note that this does not 1439 # occur if the masked array is a structured array (as in the previous block that 1440 # deals with the parent table). 1441 # 1442 # >>> x = ma.array(['xxxx']) 1443 # >>> x.fill_value # fill_value now gets represented as an 'S3' array 1444 # 'N/A' 1445 # >>> x.fill_value='yyyy' 1446 # >>> x.fill_value 1447 # 'yyy' 1448 # 1449 # To handle this we are forced to reset a private variable first: 1450 self._fill_value = None 1451 1452 self.set_fill_value(val) # defer to native ma.MaskedArray method 1453 1454 @property 1455 def data(self): 1456 """The plain MaskedArray data held by this column.""" 1457 out = self.view(np.ma.MaskedArray) 1458 # By default, a MaskedArray view will set the _baseclass to be the 1459 # same as that of our own class, i.e., BaseColumn. Since we want 1460 # to return a plain MaskedArray, we reset the baseclass accordingly. 1461 out._baseclass = np.ndarray 1462 return out 1463 1464 def filled(self, fill_value=None): 1465 """Return a copy of self, with masked values filled with a given value. 1466 1467 Parameters 1468 ---------- 1469 fill_value : scalar; optional 1470 The value to use for invalid entries (`None` by default). If 1471 `None`, the ``fill_value`` attribute of the array is used 1472 instead. 1473 1474 Returns 1475 ------- 1476 filled_column : Column 1477 A copy of ``self`` with masked entries replaced by `fill_value` 1478 (be it the function argument or the attribute of ``self``). 1479 """ 1480 if fill_value is None: 1481 fill_value = self.fill_value 1482 1483 data = super().filled(fill_value) 1484 # Use parent table definition of Column if available 1485 column_cls = self.parent_table.Column if (self.parent_table is not None) else Column 1486 1487 out = column_cls(name=self.name, data=data, unit=self.unit, 1488 format=self.format, description=self.description, 1489 meta=deepcopy(self.meta)) 1490 return out 1491 1492 def insert(self, obj, values, mask=None, axis=0): 1493 """ 1494 Insert values along the given axis before the given indices and return 1495 a new `~astropy.table.MaskedColumn` object. 1496 1497 Parameters 1498 ---------- 1499 obj : int, slice or sequence of int 1500 Object that defines the index or indices before which ``values`` is 1501 inserted. 1502 values : array-like 1503 Value(s) to insert. If the type of ``values`` is different from 1504 that of the column, ``values`` is converted to the matching type. 1505 ``values`` should be shaped so that it can be broadcast appropriately. 1506 mask : bool or array-like 1507 Mask value(s) to insert. If not supplied, and values does not have 1508 a mask either, then False is used. 1509 axis : int, optional 1510 Axis along which to insert ``values``. If ``axis`` is None then 1511 the column array is flattened before insertion. Default is 0, 1512 which will insert a row. 1513 1514 Returns 1515 ------- 1516 out : `~astropy.table.MaskedColumn` 1517 A copy of column with ``values`` and ``mask`` inserted. Note that the 1518 insertion does not occur in-place: a new masked column is returned. 1519 """ 1520 self_ma = self.data # self viewed as MaskedArray 1521 1522 if self.dtype.kind == 'O': 1523 # Even if values is array-like (e.g. [1,2,3]), insert as a single 1524 # object. Numpy.insert instead inserts each element in an array-like 1525 # input individually. 1526 new_data = np.insert(self_ma.data, obj, None, axis=axis) 1527 new_data[obj] = values 1528 else: 1529 self_ma = _expand_string_array_for_values(self_ma, values) 1530 new_data = np.insert(self_ma.data, obj, values, axis=axis) 1531 1532 if mask is None: 1533 mask = getattr(values, 'mask', np.ma.nomask) 1534 if mask is np.ma.nomask: 1535 if self.dtype.kind == 'O': 1536 mask = False 1537 else: 1538 mask = np.zeros(np.shape(values), dtype=bool) 1539 1540 new_mask = np.insert(self_ma.mask, obj, mask, axis=axis) 1541 new_ma = np.ma.array(new_data, mask=new_mask, copy=False) 1542 1543 out = new_ma.view(self.__class__) 1544 out.parent_table = None 1545 out.indices = [] 1546 out._copy_attrs(self) 1547 out.fill_value = self.fill_value 1548 1549 return out 1550 1551 def _copy_attrs_slice(self, out): 1552 # Fixes issue #3023: when calling getitem with a MaskedArray subclass 1553 # the original object attributes are not copied. 1554 if out.__class__ is self.__class__: 1555 # TODO: this part is essentially the same as what is done in 1556 # __array_finalize__ and could probably be called directly in our 1557 # override of __getitem__ in _columns_mixins.pyx). Refactor? 1558 if 'info' in self.__dict__: 1559 out.info = self.info 1560 out.parent_table = None 1561 # we need this because __getitem__ does a shallow copy of indices 1562 if out.indices is self.indices: 1563 out.indices = [] 1564 out._copy_attrs(self) 1565 return out 1566 1567 def __setitem__(self, index, value): 1568 # Issue warning for string assignment that truncates ``value`` 1569 if self.dtype.char == 'S': 1570 value = self._encode_str(value) 1571 1572 if issubclass(self.dtype.type, np.character): 1573 # Account for a bug in np.ma.MaskedArray setitem. 1574 # https://github.com/numpy/numpy/issues/8624 1575 value = np.ma.asanyarray(value, dtype=self.dtype.type) 1576 1577 # Check for string truncation after filling masked items with 1578 # empty (zero-length) string. Note that filled() does not make 1579 # a copy if there are no masked items. 1580 self._check_string_truncate(value.filled('')) 1581 1582 # update indices 1583 self.info.adjust_indices(index, value, len(self)) 1584 1585 ma.MaskedArray.__setitem__(self, index, value) 1586 1587 # We do this to make the methods show up in the API docs 1588 name = BaseColumn.name 1589 copy = BaseColumn.copy 1590 more = BaseColumn.more 1591 pprint = BaseColumn.pprint 1592 pformat = BaseColumn.pformat 1593 convert_unit_to = BaseColumn.convert_unit_to 1594