1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2from .index import SlicedIndex, TableIndices, TableLoc, TableILoc, TableLocIndices 3 4import sys 5from collections import OrderedDict, defaultdict 6from collections.abc import Mapping 7import warnings 8from copy import deepcopy 9import types 10import itertools 11import weakref 12 13import numpy as np 14from numpy import ma 15 16from astropy import log 17from astropy.units import Quantity, QuantityInfo 18from astropy.utils import isiterable, ShapedLikeNDArray 19from astropy.utils.console import color_print 20from astropy.utils.exceptions import AstropyUserWarning 21from astropy.utils.masked import Masked 22from astropy.utils.metadata import MetaData, MetaAttribute 23from astropy.utils.data_info import BaseColumnInfo, MixinInfo, DataInfo 24from astropy.utils.decorators import format_doc 25from astropy.io.registry import UnifiedReadWriteMethod 26 27from . import groups 28from .pprint import TableFormatter 29from .column import (BaseColumn, Column, MaskedColumn, _auto_names, FalseArray, 30 col_copy, _convert_sequence_data_to_array) 31from .row import Row 32from .np_utils import fix_column_name 33from .info import TableInfo 34from .index import Index, _IndexModeContext, get_index 35from .connect import TableRead, TableWrite 36from .ndarray_mixin import NdarrayMixin 37from .mixins.registry import get_mixin_handler 38from . import conf 39 40 41_implementation_notes = """ 42This string has informal notes concerning Table implementation for developers. 43 44Things to remember: 45 46- Table has customizable attributes ColumnClass, Column, MaskedColumn. 47 Table.Column is normally just column.Column (same w/ MaskedColumn) 48 but in theory they can be different. Table.ColumnClass is the default 49 class used to create new non-mixin columns, and this is a function of 50 the Table.masked attribute. Column creation / manipulation in a Table 51 needs to respect these. 52 53- Column objects that get inserted into the Table.columns attribute must 54 have the info.parent_table attribute set correctly. Beware just dropping 55 an object into the columns dict since an existing column may 56 be part of another Table and have parent_table set to point at that 57 table. Dropping that column into `columns` of this Table will cause 58 a problem for the old one so the column object needs to be copied (but 59 not necessarily the data). 60 61 Currently replace_column is always making a copy of both object and 62 data if parent_table is set. This could be improved but requires a 63 generic way to copy a mixin object but not the data. 64 65- Be aware of column objects that have indices set. 66 67- `cls.ColumnClass` is a property that effectively uses the `masked` attribute 68 to choose either `cls.Column` or `cls.MaskedColumn`. 69""" 70 71__doctest_skip__ = ['Table.read', 'Table.write', 'Table._read', 72 'Table.convert_bytestring_to_unicode', 73 'Table.convert_unicode_to_bytestring', 74 ] 75 76__doctest_requires__ = {'*pandas': ['pandas>=1.1']} 77 78_pprint_docs = """ 79 {__doc__} 80 81 Parameters 82 ---------- 83 max_lines : int or None 84 Maximum number of lines in table output. 85 86 max_width : int or None 87 Maximum character width of output. 88 89 show_name : bool 90 Include a header row for column names. Default is True. 91 92 show_unit : bool 93 Include a header row for unit. Default is to show a row 94 for units only if one or more columns has a defined value 95 for the unit. 96 97 show_dtype : bool 98 Include a header row for column dtypes. Default is True. 99 100 align : str or list or tuple or None 101 Left/right alignment of columns. Default is right (None) for all 102 columns. Other allowed values are '>', '<', '^', and '0=' for 103 right, left, centered, and 0-padded, respectively. A list of 104 strings can be provided for alignment of tables with multiple 105 columns. 106 """ 107 108_pformat_docs = """ 109 {__doc__} 110 111 Parameters 112 ---------- 113 max_lines : int or None 114 Maximum number of rows to output 115 116 max_width : int or None 117 Maximum character width of output 118 119 show_name : bool 120 Include a header row for column names. Default is True. 121 122 show_unit : bool 123 Include a header row for unit. Default is to show a row 124 for units only if one or more columns has a defined value 125 for the unit. 126 127 show_dtype : bool 128 Include a header row for column dtypes. Default is True. 129 130 html : bool 131 Format the output as an HTML table. Default is False. 132 133 tableid : str or None 134 An ID tag for the table; only used if html is set. Default is 135 "table{id}", where id is the unique integer id of the table object, 136 id(self) 137 138 align : str or list or tuple or None 139 Left/right alignment of columns. Default is right (None) for all 140 columns. Other allowed values are '>', '<', '^', and '0=' for 141 right, left, centered, and 0-padded, respectively. A list of 142 strings can be provided for alignment of tables with multiple 143 columns. 144 145 tableclass : str or list of str or None 146 CSS classes for the table; only used if html is set. Default is 147 None. 148 149 Returns 150 ------- 151 lines : list 152 Formatted table as a list of strings. 153 """ 154 155 156class TableReplaceWarning(UserWarning): 157 """ 158 Warning class for cases when a table column is replaced via the 159 Table.__setitem__ syntax e.g. t['a'] = val. 160 161 This does not inherit from AstropyWarning because we want to use 162 stacklevel=3 to show the user where the issue occurred in their code. 163 """ 164 pass 165 166 167def descr(col): 168 """Array-interface compliant full description of a column. 169 170 This returns a 3-tuple (name, type, shape) that can always be 171 used in a structured array dtype definition. 172 """ 173 col_dtype = 'O' if (col.info.dtype is None) else col.info.dtype 174 col_shape = col.shape[1:] if hasattr(col, 'shape') else () 175 return (col.info.name, col_dtype, col_shape) 176 177 178def has_info_class(obj, cls): 179 """Check if the object's info is an instance of cls.""" 180 # We check info on the class of the instance, since on the instance 181 # itself accessing 'info' has side effects in that it sets 182 # obj.__dict__['info'] if it does not exist already. 183 return isinstance(getattr(obj.__class__, 'info', None), cls) 184 185 186def _get_names_from_list_of_dict(rows): 187 """Return list of column names if ``rows`` is a list of dict that 188 defines table data. 189 190 If rows is not a list of dict then return None. 191 """ 192 if rows is None: 193 return None 194 195 names = set() 196 for row in rows: 197 if not isinstance(row, Mapping): 198 return None 199 names.update(row) 200 return list(names) 201 202 203# Note to future maintainers: when transitioning this to dict 204# be sure to change the OrderedDict ref(s) in Row and in __len__(). 205 206class TableColumns(OrderedDict): 207 """OrderedDict subclass for a set of columns. 208 209 This class enhances item access to provide convenient access to columns 210 by name or index, including slice access. It also handles renaming 211 of columns. 212 213 The initialization argument ``cols`` can be a list of ``Column`` objects 214 or any structure that is valid for initializing a Python dict. This 215 includes a dict, list of (key, val) tuples or [key, val] lists, etc. 216 217 Parameters 218 ---------- 219 cols : dict, list, tuple; optional 220 Column objects as data structure that can init dict (see above) 221 """ 222 223 def __init__(self, cols={}): 224 if isinstance(cols, (list, tuple)): 225 # `cols` should be a list of two-tuples, but it is allowed to have 226 # columns (BaseColumn or mixins) in the list. 227 newcols = [] 228 for col in cols: 229 if has_info_class(col, BaseColumnInfo): 230 newcols.append((col.info.name, col)) 231 else: 232 newcols.append(col) 233 cols = newcols 234 super().__init__(cols) 235 236 def __getitem__(self, item): 237 """Get items from a TableColumns object. 238 :: 239 240 tc = TableColumns(cols=[Column(name='a'), Column(name='b'), Column(name='c')]) 241 tc['a'] # Column('a') 242 tc[1] # Column('b') 243 tc['a', 'b'] # <TableColumns names=('a', 'b')> 244 tc[1:3] # <TableColumns names=('b', 'c')> 245 """ 246 if isinstance(item, str): 247 return OrderedDict.__getitem__(self, item) 248 elif isinstance(item, (int, np.integer)): 249 return list(self.values())[item] 250 elif (isinstance(item, np.ndarray) and item.shape == () and item.dtype.kind == 'i'): 251 return list(self.values())[item.item()] 252 elif isinstance(item, tuple): 253 return self.__class__([self[x] for x in item]) 254 elif isinstance(item, slice): 255 return self.__class__([self[x] for x in list(self)[item]]) 256 else: 257 raise IndexError('Illegal key or index value for {} object' 258 .format(self.__class__.__name__)) 259 260 def __setitem__(self, item, value, validated=False): 261 """ 262 Set item in this dict instance, but do not allow directly replacing an 263 existing column unless it is already validated (and thus is certain to 264 not corrupt the table). 265 266 NOTE: it is easily possible to corrupt a table by directly *adding* a new 267 key to the TableColumns attribute of a Table, e.g. 268 ``t.columns['jane'] = 'doe'``. 269 270 """ 271 if item in self and not validated: 272 raise ValueError("Cannot replace column '{}'. Use Table.replace_column() instead." 273 .format(item)) 274 super().__setitem__(item, value) 275 276 def __repr__(self): 277 names = (f"'{x}'" for x in self.keys()) 278 return f"<{self.__class__.__name__} names=({','.join(names)})>" 279 280 def _rename_column(self, name, new_name): 281 if name == new_name: 282 return 283 284 if new_name in self: 285 raise KeyError(f"Column {new_name} already exists") 286 287 # Rename column names in pprint include/exclude attributes as needed 288 parent_table = self[name].info.parent_table 289 if parent_table is not None: 290 parent_table.pprint_exclude_names._rename(name, new_name) 291 parent_table.pprint_include_names._rename(name, new_name) 292 293 mapper = {name: new_name} 294 new_names = [mapper.get(name, name) for name in self] 295 cols = list(self.values()) 296 self.clear() 297 self.update(list(zip(new_names, cols))) 298 299 def __delitem__(self, name): 300 # Remove column names from pprint include/exclude attributes as needed. 301 # __delitem__ also gets called for pop() and popitem(). 302 parent_table = self[name].info.parent_table 303 if parent_table is not None: 304 # _remove() method does not require that `name` is in the attribute 305 parent_table.pprint_exclude_names._remove(name) 306 parent_table.pprint_include_names._remove(name) 307 return super().__delitem__(name) 308 309 def isinstance(self, cls): 310 """ 311 Return a list of columns which are instances of the specified classes. 312 313 Parameters 314 ---------- 315 cls : class or tuple thereof 316 Column class (including mixin) or tuple of Column classes. 317 318 Returns 319 ------- 320 col_list : list of `Column` 321 List of Column objects which are instances of given classes. 322 """ 323 cols = [col for col in self.values() if isinstance(col, cls)] 324 return cols 325 326 def not_isinstance(self, cls): 327 """ 328 Return a list of columns which are not instances of the specified classes. 329 330 Parameters 331 ---------- 332 cls : class or tuple thereof 333 Column class (including mixin) or tuple of Column classes. 334 335 Returns 336 ------- 337 col_list : list of `Column` 338 List of Column objects which are not instances of given classes. 339 """ 340 cols = [col for col in self.values() if not isinstance(col, cls)] 341 return cols 342 343 344class TableAttribute(MetaAttribute): 345 """ 346 Descriptor to define a custom attribute for a Table subclass. 347 348 The value of the ``TableAttribute`` will be stored in a dict named 349 ``__attributes__`` that is stored in the table ``meta``. The attribute 350 can be accessed and set in the usual way, and it can be provided when 351 creating the object. 352 353 Defining an attribute by this mechanism ensures that it will persist if 354 the table is sliced or serialized, for example as a pickle or ECSV file. 355 356 See the `~astropy.utils.metadata.MetaAttribute` documentation for additional 357 details. 358 359 Parameters 360 ---------- 361 default : object 362 Default value for attribute 363 364 Examples 365 -------- 366 >>> from astropy.table import Table, TableAttribute 367 >>> class MyTable(Table): 368 ... identifier = TableAttribute(default=1) 369 >>> t = MyTable(identifier=10) 370 >>> t.identifier 371 10 372 >>> t.meta 373 OrderedDict([('__attributes__', {'identifier': 10})]) 374 """ 375 376 377class PprintIncludeExclude(TableAttribute): 378 """Maintain tuple that controls table column visibility for print output. 379 380 This is a descriptor that inherits from MetaAttribute so that the attribute 381 value is stored in the table meta['__attributes__']. 382 383 This gets used for the ``pprint_include_names`` and ``pprint_exclude_names`` Table 384 attributes. 385 """ 386 def __get__(self, instance, owner_cls): 387 """Get the attribute. 388 389 This normally returns an instance of this class which is stored on the 390 owner object. 391 """ 392 # For getting from class not an instance 393 if instance is None: 394 return self 395 396 # If not already stored on `instance`, make a copy of the class 397 # descriptor object and put it onto the instance. 398 value = instance.__dict__.get(self.name) 399 if value is None: 400 value = deepcopy(self) 401 instance.__dict__[self.name] = value 402 403 # We set _instance_ref on every call, since if one makes copies of 404 # instances, this attribute will be copied as well, which will lose the 405 # reference. 406 value._instance_ref = weakref.ref(instance) 407 return value 408 409 def __set__(self, instance, names): 410 """Set value of ``instance`` attribute to ``names``. 411 412 Parameters 413 ---------- 414 instance : object 415 Instance that owns the attribute 416 names : None, str, list, tuple 417 Column name(s) to store, or None to clear 418 """ 419 if isinstance(names, str): 420 names = [names] 421 if names is None: 422 # Remove attribute value from the meta['__attributes__'] dict. 423 # Subsequent access will just return None. 424 delattr(instance, self.name) 425 else: 426 # This stores names into instance.meta['__attributes__'] as tuple 427 return super().__set__(instance, tuple(names)) 428 429 def __call__(self): 430 """Get the value of the attribute. 431 432 Returns 433 ------- 434 names : None, tuple 435 Include/exclude names 436 """ 437 # Get the value from instance.meta['__attributes__'] 438 instance = self._instance_ref() 439 return super().__get__(instance, instance.__class__) 440 441 def __repr__(self): 442 if hasattr(self, '_instance_ref'): 443 out = f'<{self.__class__.__name__} name={self.name} value={self()}>' 444 else: 445 out = super().__repr__() 446 return out 447 448 def _add_remove_setup(self, names): 449 """Common setup for add and remove. 450 451 - Coerce attribute value to a list 452 - Coerce names into a list 453 - Get the parent table instance 454 """ 455 names = [names] if isinstance(names, str) else list(names) 456 # Get the value. This is the same as self() but we need `instance` here. 457 instance = self._instance_ref() 458 value = super().__get__(instance, instance.__class__) 459 value = [] if value is None else list(value) 460 return instance, names, value 461 462 def add(self, names): 463 """Add ``names`` to the include/exclude attribute. 464 465 Parameters 466 ---------- 467 names : str, list, tuple 468 Column name(s) to add 469 """ 470 instance, names, value = self._add_remove_setup(names) 471 value.extend(name for name in names if name not in value) 472 super().__set__(instance, tuple(value)) 473 474 def remove(self, names): 475 """Remove ``names`` from the include/exclude attribute. 476 477 Parameters 478 ---------- 479 names : str, list, tuple 480 Column name(s) to remove 481 """ 482 self._remove(names, raise_exc=True) 483 484 def _remove(self, names, raise_exc=False): 485 """Remove ``names`` with optional checking if they exist""" 486 instance, names, value = self._add_remove_setup(names) 487 488 # Return now if there are no attributes and thus no action to be taken. 489 if not raise_exc and '__attributes__' not in instance.meta: 490 return 491 492 # Remove one by one, optionally raising an exception if name is missing. 493 for name in names: 494 if name in value: 495 value.remove(name) # Using the list.remove method 496 elif raise_exc: 497 raise ValueError(f'{name} not in {self.name}') 498 499 # Change to either None or a tuple for storing back to attribute 500 value = None if value == [] else tuple(value) 501 self.__set__(instance, value) 502 503 def _rename(self, name, new_name): 504 """Rename ``name`` to ``new_name`` if ``name`` is in the list""" 505 names = self() or () 506 if name in names: 507 new_names = list(names) 508 new_names[new_names.index(name)] = new_name 509 self.set(new_names) 510 511 def set(self, names): 512 """Set value of include/exclude attribute to ``names``. 513 514 Parameters 515 ---------- 516 names : None, str, list, tuple 517 Column name(s) to store, or None to clear 518 """ 519 class _Context: 520 def __init__(self, descriptor_self): 521 self.descriptor_self = descriptor_self 522 self.names_orig = descriptor_self() 523 524 def __enter__(self): 525 pass 526 527 def __exit__(self, type, value, tb): 528 descriptor_self = self.descriptor_self 529 instance = descriptor_self._instance_ref() 530 descriptor_self.__set__(instance, self.names_orig) 531 532 def __repr__(self): 533 return repr(self.descriptor_self) 534 535 ctx = _Context(descriptor_self=self) 536 537 instance = self._instance_ref() 538 self.__set__(instance, names) 539 540 return ctx 541 542 543class Table: 544 """A class to represent tables of heterogeneous data. 545 546 `~astropy.table.Table` provides a class for heterogeneous tabular data. 547 A key enhancement provided by the `~astropy.table.Table` class over 548 e.g. a `numpy` structured array is the ability to easily modify the 549 structure of the table by adding or removing columns, or adding new 550 rows of data. In addition table and column metadata are fully supported. 551 552 `~astropy.table.Table` differs from `~astropy.nddata.NDData` by the 553 assumption that the input data consists of columns of homogeneous data, 554 where each column has a unique identifier and may contain additional 555 metadata such as the data unit, format, and description. 556 557 See also: https://docs.astropy.org/en/stable/table/ 558 559 Parameters 560 ---------- 561 data : numpy ndarray, dict, list, table-like object, optional 562 Data to initialize table. 563 masked : bool, optional 564 Specify whether the table is masked. 565 names : list, optional 566 Specify column names. 567 dtype : list, optional 568 Specify column data types. 569 meta : dict, optional 570 Metadata associated with the table. 571 copy : bool, optional 572 Copy the input data. If the input is a Table the ``meta`` is always 573 copied regardless of the ``copy`` parameter. 574 Default is True. 575 rows : numpy ndarray, list of list, optional 576 Row-oriented data for table instead of ``data`` argument. 577 copy_indices : bool, optional 578 Copy any indices in the input data. Default is True. 579 units : list, dict, optional 580 List or dict of units to apply to columns. 581 descriptions : list, dict, optional 582 List or dict of descriptions to apply to columns. 583 **kwargs : dict, optional 584 Additional keyword args when converting table-like object. 585 """ 586 587 meta = MetaData(copy=False) 588 589 # Define class attributes for core container objects to allow for subclass 590 # customization. 591 Row = Row 592 Column = Column 593 MaskedColumn = MaskedColumn 594 TableColumns = TableColumns 595 TableFormatter = TableFormatter 596 597 # Unified I/O read and write methods from .connect 598 read = UnifiedReadWriteMethod(TableRead) 599 write = UnifiedReadWriteMethod(TableWrite) 600 601 pprint_exclude_names = PprintIncludeExclude() 602 pprint_include_names = PprintIncludeExclude() 603 604 def as_array(self, keep_byteorder=False, names=None): 605 """ 606 Return a new copy of the table in the form of a structured np.ndarray or 607 np.ma.MaskedArray object (as appropriate). 608 609 Parameters 610 ---------- 611 keep_byteorder : bool, optional 612 By default the returned array has all columns in native byte 613 order. However, if this option is `True` this preserves the 614 byte order of all columns (if any are non-native). 615 616 names : list, optional: 617 List of column names to include for returned structured array. 618 Default is to include all table columns. 619 620 Returns 621 ------- 622 table_array : array or `~numpy.ma.MaskedArray` 623 Copy of table as a numpy structured array. 624 ndarray for unmasked or `~numpy.ma.MaskedArray` for masked. 625 """ 626 masked = self.masked or self.has_masked_columns or self.has_masked_values 627 empty_init = ma.empty if masked else np.empty 628 if len(self.columns) == 0: 629 return empty_init(0, dtype=None) 630 631 dtype = [] 632 633 cols = self.columns.values() 634 635 if names is not None: 636 cols = [col for col in cols if col.info.name in names] 637 638 for col in cols: 639 col_descr = descr(col) 640 641 if not (col.info.dtype.isnative or keep_byteorder): 642 new_dt = np.dtype(col_descr[1]).newbyteorder('=') 643 col_descr = (col_descr[0], new_dt, col_descr[2]) 644 645 dtype.append(col_descr) 646 647 data = empty_init(len(self), dtype=dtype) 648 for col in cols: 649 # When assigning from one array into a field of a structured array, 650 # Numpy will automatically swap those columns to their destination 651 # byte order where applicable 652 data[col.info.name] = col 653 654 # For masked out, masked mixin columns need to set output mask attribute. 655 if masked and has_info_class(col, MixinInfo) and hasattr(col, 'mask'): 656 data[col.info.name].mask = col.mask 657 658 return data 659 660 def __init__(self, data=None, masked=False, names=None, dtype=None, 661 meta=None, copy=True, rows=None, copy_indices=True, 662 units=None, descriptions=None, 663 **kwargs): 664 665 # Set up a placeholder empty table 666 self._set_masked(masked) 667 self.columns = self.TableColumns() 668 self.formatter = self.TableFormatter() 669 self._copy_indices = True # copy indices from this Table by default 670 self._init_indices = copy_indices # whether to copy indices in init 671 self.primary_key = None 672 673 # Must copy if dtype are changing 674 if not copy and dtype is not None: 675 raise ValueError('Cannot specify dtype when copy=False') 676 677 # Specifies list of names found for the case of initializing table with 678 # a list of dict. If data are not list of dict then this is None. 679 names_from_list_of_dict = None 680 681 # Row-oriented input, e.g. list of lists or list of tuples, list of 682 # dict, Row instance. Set data to something that the subsequent code 683 # will parse correctly. 684 if rows is not None: 685 if data is not None: 686 raise ValueError('Cannot supply both `data` and `rows` values') 687 if isinstance(rows, types.GeneratorType): 688 # Without this then the all(..) test below uses up the generator 689 rows = list(rows) 690 691 # Get column names if `rows` is a list of dict, otherwise this is None 692 names_from_list_of_dict = _get_names_from_list_of_dict(rows) 693 if names_from_list_of_dict: 694 data = rows 695 elif isinstance(rows, self.Row): 696 data = rows 697 else: 698 data = list(zip(*rows)) 699 700 # Infer the type of the input data and set up the initialization 701 # function, number of columns, and potentially the default col names 702 703 default_names = None 704 705 # Handle custom (subclass) table attributes that are stored in meta. 706 # These are defined as class attributes using the TableAttribute 707 # descriptor. Any such attributes get removed from kwargs here and 708 # stored for use after the table is otherwise initialized. Any values 709 # provided via kwargs will have precedence over existing values from 710 # meta (e.g. from data as a Table or meta via kwargs). 711 meta_table_attrs = {} 712 if kwargs: 713 for attr in list(kwargs): 714 descr = getattr(self.__class__, attr, None) 715 if isinstance(descr, TableAttribute): 716 meta_table_attrs[attr] = kwargs.pop(attr) 717 718 if hasattr(data, '__astropy_table__'): 719 # Data object implements the __astropy_table__ interface method. 720 # Calling that method returns an appropriate instance of 721 # self.__class__ and respects the `copy` arg. The returned 722 # Table object should NOT then be copied. 723 data = data.__astropy_table__(self.__class__, copy, **kwargs) 724 copy = False 725 elif kwargs: 726 raise TypeError('__init__() got unexpected keyword argument {!r}' 727 .format(list(kwargs.keys())[0])) 728 729 if (isinstance(data, np.ndarray) 730 and data.shape == (0,) 731 and not data.dtype.names): 732 data = None 733 734 if isinstance(data, self.Row): 735 data = data._table[data._index:data._index + 1] 736 737 if isinstance(data, (list, tuple)): 738 # Get column names from `data` if it is a list of dict, otherwise this is None. 739 # This might be previously defined if `rows` was supplied as an init arg. 740 names_from_list_of_dict = (names_from_list_of_dict 741 or _get_names_from_list_of_dict(data)) 742 if names_from_list_of_dict: 743 init_func = self._init_from_list_of_dicts 744 n_cols = len(names_from_list_of_dict) 745 else: 746 init_func = self._init_from_list 747 n_cols = len(data) 748 749 elif isinstance(data, np.ndarray): 750 if data.dtype.names: 751 init_func = self._init_from_ndarray # _struct 752 n_cols = len(data.dtype.names) 753 default_names = data.dtype.names 754 else: 755 init_func = self._init_from_ndarray # _homog 756 if data.shape == (): 757 raise ValueError('Can not initialize a Table with a scalar') 758 elif len(data.shape) == 1: 759 data = data[np.newaxis, :] 760 n_cols = data.shape[1] 761 762 elif isinstance(data, Mapping): 763 init_func = self._init_from_dict 764 default_names = list(data) 765 n_cols = len(default_names) 766 767 elif isinstance(data, Table): 768 # If user-input meta is None then use data.meta (if non-trivial) 769 if meta is None and data.meta: 770 # At this point do NOT deepcopy data.meta as this will happen after 771 # table init_func() is called. But for table input the table meta 772 # gets a key copy here if copy=False because later a direct object ref 773 # is used. 774 meta = data.meta if copy else data.meta.copy() 775 776 # Handle indices on input table. Copy primary key and don't copy indices 777 # if the input Table is in non-copy mode. 778 self.primary_key = data.primary_key 779 self._init_indices = self._init_indices and data._copy_indices 780 781 # Extract default names, n_cols, and then overwrite ``data`` to be the 782 # table columns so we can use _init_from_list. 783 default_names = data.colnames 784 n_cols = len(default_names) 785 data = list(data.columns.values()) 786 787 init_func = self._init_from_list 788 789 elif data is None: 790 if names is None: 791 if dtype is None: 792 # Table was initialized as `t = Table()`. Set up for empty 793 # table with names=[], data=[], and n_cols=0. 794 # self._init_from_list() will simply return, giving the 795 # expected empty table. 796 names = [] 797 else: 798 try: 799 # No data nor names but dtype is available. This must be 800 # valid to initialize a structured array. 801 dtype = np.dtype(dtype) 802 names = dtype.names 803 dtype = [dtype[name] for name in names] 804 except Exception: 805 raise ValueError('dtype was specified but could not be ' 806 'parsed for column names') 807 # names is guaranteed to be set at this point 808 init_func = self._init_from_list 809 n_cols = len(names) 810 data = [[]] * n_cols 811 812 else: 813 raise ValueError(f'Data type {type(data)} not allowed to init Table') 814 815 # Set up defaults if names and/or dtype are not specified. 816 # A value of None means the actual value will be inferred 817 # within the appropriate initialization routine, either from 818 # existing specification or auto-generated. 819 820 if dtype is None: 821 dtype = [None] * n_cols 822 elif isinstance(dtype, np.dtype): 823 if default_names is None: 824 default_names = dtype.names 825 # Convert a numpy dtype input to a list of dtypes for later use. 826 dtype = [dtype[name] for name in dtype.names] 827 828 if names is None: 829 names = default_names or [None] * n_cols 830 831 # Numpy does not support bytes column names on Python 3, so fix them 832 # up now. 833 names = [fix_column_name(name) for name in names] 834 835 self._check_names_dtype(names, dtype, n_cols) 836 837 # Finally do the real initialization 838 init_func(data, names, dtype, n_cols, copy) 839 840 # Set table meta. If copy=True then deepcopy meta otherwise use the 841 # user-supplied meta directly. 842 if meta is not None: 843 self.meta = deepcopy(meta) if copy else meta 844 845 # Update meta with TableAttributes supplied as kwargs in Table init. 846 # This takes precedence over previously-defined meta. 847 if meta_table_attrs: 848 for attr, value in meta_table_attrs.items(): 849 setattr(self, attr, value) 850 851 # Whatever happens above, the masked property should be set to a boolean 852 if self.masked not in (None, True, False): 853 raise TypeError("masked property must be None, True or False") 854 855 self._set_column_attribute('unit', units) 856 self._set_column_attribute('description', descriptions) 857 858 def _set_column_attribute(self, attr, values): 859 """Set ``attr`` for columns to ``values``, which can be either a dict (keyed by column 860 name) or a dict of name: value pairs. This is used for handling the ``units`` and 861 ``descriptions`` kwargs to ``__init__``. 862 """ 863 if not values: 864 return 865 866 if isinstance(values, Row): 867 # For a Row object transform to an equivalent dict. 868 values = {name: values[name] for name in values.colnames} 869 870 if not isinstance(values, Mapping): 871 # If not a dict map, assume iterable and map to dict if the right length 872 if len(values) != len(self.columns): 873 raise ValueError(f'sequence of {attr} values must match number of columns') 874 values = dict(zip(self.colnames, values)) 875 876 for name, value in values.items(): 877 if name not in self.columns: 878 raise ValueError(f'invalid column name {name} for setting {attr} attribute') 879 880 # Special case: ignore unit if it is an empty or blank string 881 if attr == 'unit' and isinstance(value, str): 882 if value.strip() == '': 883 value = None 884 885 if value not in (np.ma.masked, None): 886 setattr(self[name].info, attr, value) 887 888 def __getstate__(self): 889 columns = OrderedDict((key, col if isinstance(col, BaseColumn) else col_copy(col)) 890 for key, col in self.columns.items()) 891 return (columns, self.meta) 892 893 def __setstate__(self, state): 894 columns, meta = state 895 self.__init__(columns, meta=meta) 896 897 @property 898 def mask(self): 899 # Dynamic view of available masks 900 if self.masked or self.has_masked_columns or self.has_masked_values: 901 mask_table = Table([getattr(col, 'mask', FalseArray(col.shape)) 902 for col in self.itercols()], 903 names=self.colnames, copy=False) 904 905 # Set hidden attribute to force inplace setitem so that code like 906 # t.mask['a'] = [1, 0, 1] will correctly set the underlying mask. 907 # See #5556 for discussion. 908 mask_table._setitem_inplace = True 909 else: 910 mask_table = None 911 912 return mask_table 913 914 @mask.setter 915 def mask(self, val): 916 self.mask[:] = val 917 918 @property 919 def _mask(self): 920 """This is needed so that comparison of a masked Table and a 921 MaskedArray works. The requirement comes from numpy.ma.core 922 so don't remove this property.""" 923 return self.as_array().mask 924 925 def filled(self, fill_value=None): 926 """Return copy of self, with masked values filled. 927 928 If input ``fill_value`` supplied then that value is used for all 929 masked entries in the table. Otherwise the individual 930 ``fill_value`` defined for each table column is used. 931 932 Parameters 933 ---------- 934 fill_value : str 935 If supplied, this ``fill_value`` is used for all masked entries 936 in the entire table. 937 938 Returns 939 ------- 940 filled_table : `~astropy.table.Table` 941 New table with masked values filled 942 """ 943 if self.masked or self.has_masked_columns or self.has_masked_values: 944 # Get new columns with masked values filled, then create Table with those 945 # new cols (copy=False) but deepcopy the meta. 946 data = [col.filled(fill_value) if hasattr(col, 'filled') else col 947 for col in self.itercols()] 948 return self.__class__(data, meta=deepcopy(self.meta), copy=False) 949 else: 950 # Return copy of the original object. 951 return self.copy() 952 953 @property 954 def indices(self): 955 ''' 956 Return the indices associated with columns of the table 957 as a TableIndices object. 958 ''' 959 lst = [] 960 for column in self.columns.values(): 961 for index in column.info.indices: 962 if sum([index is x for x in lst]) == 0: # ensure uniqueness 963 lst.append(index) 964 return TableIndices(lst) 965 966 @property 967 def loc(self): 968 ''' 969 Return a TableLoc object that can be used for retrieving 970 rows by index in a given data range. Note that both loc 971 and iloc work only with single-column indices. 972 ''' 973 return TableLoc(self) 974 975 @property 976 def loc_indices(self): 977 """ 978 Return a TableLocIndices object that can be used for retrieving 979 the row indices corresponding to given table index key value or values. 980 """ 981 return TableLocIndices(self) 982 983 @property 984 def iloc(self): 985 ''' 986 Return a TableILoc object that can be used for retrieving 987 indexed rows in the order they appear in the index. 988 ''' 989 return TableILoc(self) 990 991 def add_index(self, colnames, engine=None, unique=False): 992 ''' 993 Insert a new index among one or more columns. 994 If there are no indices, make this index the 995 primary table index. 996 997 Parameters 998 ---------- 999 colnames : str or list 1000 List of column names (or a single column name) to index 1001 engine : type or None 1002 Indexing engine class to use, from among SortedArray, BST, 1003 and SCEngine. If the supplied argument is None 1004 (by default), use SortedArray. 1005 unique : bool 1006 Whether the values of the index must be unique. Default is False. 1007 ''' 1008 if isinstance(colnames, str): 1009 colnames = (colnames,) 1010 columns = self.columns[tuple(colnames)].values() 1011 1012 # make sure all columns support indexing 1013 for col in columns: 1014 if not getattr(col.info, '_supports_indexing', False): 1015 raise ValueError('Cannot create an index on column "{}", of ' 1016 'type "{}"'.format(col.info.name, type(col))) 1017 1018 is_primary = not self.indices 1019 index = Index(columns, engine=engine, unique=unique) 1020 sliced_index = SlicedIndex(index, slice(0, 0, None), original=True) 1021 if is_primary: 1022 self.primary_key = colnames 1023 for col in columns: 1024 col.info.indices.append(sliced_index) 1025 1026 def remove_indices(self, colname): 1027 ''' 1028 Remove all indices involving the given column. 1029 If the primary index is removed, the new primary 1030 index will be the most recently added remaining 1031 index. 1032 1033 Parameters 1034 ---------- 1035 colname : str 1036 Name of column 1037 ''' 1038 col = self.columns[colname] 1039 for index in self.indices: 1040 try: 1041 index.col_position(col.info.name) 1042 except ValueError: 1043 pass 1044 else: 1045 for c in index.columns: 1046 c.info.indices.remove(index) 1047 1048 def index_mode(self, mode): 1049 ''' 1050 Return a context manager for an indexing mode. 1051 1052 Parameters 1053 ---------- 1054 mode : str 1055 Either 'freeze', 'copy_on_getitem', or 'discard_on_copy'. 1056 In 'discard_on_copy' mode, 1057 indices are not copied whenever columns or tables are copied. 1058 In 'freeze' mode, indices are not modified whenever columns are 1059 modified; at the exit of the context, indices refresh themselves 1060 based on column values. This mode is intended for scenarios in 1061 which one intends to make many additions or modifications in an 1062 indexed column. 1063 In 'copy_on_getitem' mode, indices are copied when taking column 1064 slices as well as table slices, so col[i0:i1] will preserve 1065 indices. 1066 ''' 1067 return _IndexModeContext(self, mode) 1068 1069 def __array__(self, dtype=None): 1070 """Support converting Table to np.array via np.array(table). 1071 1072 Coercion to a different dtype via np.array(table, dtype) is not 1073 supported and will raise a ValueError. 1074 """ 1075 if dtype is not None: 1076 raise ValueError('Datatype coercion is not allowed') 1077 1078 # This limitation is because of the following unexpected result that 1079 # should have made a table copy while changing the column names. 1080 # 1081 # >>> d = astropy.table.Table([[1,2],[3,4]]) 1082 # >>> np.array(d, dtype=[('a', 'i8'), ('b', 'i8')]) 1083 # array([(0, 0), (0, 0)], 1084 # dtype=[('a', '<i8'), ('b', '<i8')]) 1085 1086 out = self.as_array() 1087 return out.data if isinstance(out, np.ma.MaskedArray) else out 1088 1089 def _check_names_dtype(self, names, dtype, n_cols): 1090 """Make sure that names and dtype are both iterable and have 1091 the same length as data. 1092 """ 1093 for inp_list, inp_str in ((dtype, 'dtype'), (names, 'names')): 1094 if not isiterable(inp_list): 1095 raise ValueError(f'{inp_str} must be a list or None') 1096 1097 if len(names) != n_cols or len(dtype) != n_cols: 1098 raise ValueError( 1099 'Arguments "names" and "dtype" must match number of columns') 1100 1101 def _init_from_list_of_dicts(self, data, names, dtype, n_cols, copy): 1102 """Initialize table from a list of dictionaries representing rows.""" 1103 # Define placeholder for missing values as a unique object that cannot 1104 # every occur in user data. 1105 MISSING = object() 1106 1107 # Gather column names that exist in the input `data`. 1108 names_from_data = set() 1109 for row in data: 1110 names_from_data.update(row) 1111 1112 if set(data[0].keys()) == names_from_data: 1113 names_from_data = list(data[0].keys()) 1114 else: 1115 names_from_data = sorted(names_from_data) 1116 1117 # Note: if set(data[0].keys()) != names_from_data, this will give an 1118 # exception later, so NO need to catch here. 1119 1120 # Convert list of dict into dict of list (cols), keep track of missing 1121 # indexes and put in MISSING placeholders in the `cols` lists. 1122 cols = {} 1123 missing_indexes = defaultdict(list) 1124 for name in names_from_data: 1125 cols[name] = [] 1126 for ii, row in enumerate(data): 1127 try: 1128 val = row[name] 1129 except KeyError: 1130 missing_indexes[name].append(ii) 1131 val = MISSING 1132 cols[name].append(val) 1133 1134 # Fill the missing entries with first values 1135 if missing_indexes: 1136 for name, indexes in missing_indexes.items(): 1137 col = cols[name] 1138 first_val = next(val for val in col if val is not MISSING) 1139 for index in indexes: 1140 col[index] = first_val 1141 1142 # prepare initialization 1143 if all(name is None for name in names): 1144 names = names_from_data 1145 1146 self._init_from_dict(cols, names, dtype, n_cols, copy) 1147 1148 # Mask the missing values if necessary, converting columns to MaskedColumn 1149 # as needed. 1150 if missing_indexes: 1151 for name, indexes in missing_indexes.items(): 1152 col = self[name] 1153 # Ensure that any Column subclasses with MISSING values can support 1154 # setting masked values. As of astropy 4.0 the test condition below is 1155 # always True since _init_from_dict cannot result in mixin columns. 1156 if isinstance(col, Column) and not isinstance(col, MaskedColumn): 1157 self[name] = self.MaskedColumn(col, copy=False) 1158 1159 # Finally do the masking in a mixin-safe way. 1160 self[name][indexes] = np.ma.masked 1161 return 1162 1163 def _init_from_list(self, data, names, dtype, n_cols, copy): 1164 """Initialize table from a list of column data. A column can be a 1165 Column object, np.ndarray, mixin, or any other iterable object. 1166 """ 1167 # Special case of initializing an empty table like `t = Table()`. No 1168 # action required at this point. 1169 if n_cols == 0: 1170 return 1171 1172 cols = [] 1173 default_names = _auto_names(n_cols) 1174 1175 for col, name, default_name, dtype in zip(data, names, default_names, dtype): 1176 col = self._convert_data_to_col(col, copy, default_name, dtype, name) 1177 1178 cols.append(col) 1179 1180 self._init_from_cols(cols) 1181 1182 def _convert_data_to_col(self, data, copy=True, default_name=None, dtype=None, name=None): 1183 """ 1184 Convert any allowed sequence data ``col`` to a column object that can be used 1185 directly in the self.columns dict. This could be a Column, MaskedColumn, 1186 or mixin column. 1187 1188 The final column name is determined by:: 1189 1190 name or data.info.name or def_name 1191 1192 If ``data`` has no ``info`` then ``name = name or def_name``. 1193 1194 The behavior of ``copy`` for Column objects is: 1195 - copy=True: new class instance with a copy of data and deep copy of meta 1196 - copy=False: new class instance with same data and a key-only copy of meta 1197 1198 For mixin columns: 1199 - copy=True: new class instance with copy of data and deep copy of meta 1200 - copy=False: original instance (no copy at all) 1201 1202 Parameters 1203 ---------- 1204 data : object (column-like sequence) 1205 Input column data 1206 copy : bool 1207 Make a copy 1208 default_name : str 1209 Default name 1210 dtype : np.dtype or None 1211 Data dtype 1212 name : str or None 1213 Column name 1214 1215 Returns 1216 ------- 1217 col : Column, MaskedColumn, mixin-column type 1218 Object that can be used as a column in self 1219 """ 1220 1221 data_is_mixin = self._is_mixin_for_table(data) 1222 masked_col_cls = (self.ColumnClass 1223 if issubclass(self.ColumnClass, self.MaskedColumn) 1224 else self.MaskedColumn) 1225 1226 try: 1227 data0_is_mixin = self._is_mixin_for_table(data[0]) 1228 except Exception: 1229 # Need broad exception, cannot predict what data[0] raises for arbitrary data 1230 data0_is_mixin = False 1231 1232 # If the data is not an instance of Column or a mixin class, we can 1233 # check the registry of mixin 'handlers' to see if the column can be 1234 # converted to a mixin class 1235 if (handler := get_mixin_handler(data)) is not None: 1236 original_data = data 1237 data = handler(data) 1238 if not (data_is_mixin := self._is_mixin_for_table(data)): 1239 fully_qualified_name = (original_data.__class__.__module__ + '.' 1240 + original_data.__class__.__name__) 1241 raise TypeError('Mixin handler for object of type ' 1242 f'{fully_qualified_name} ' 1243 'did not return a valid mixin column') 1244 1245 # Structured ndarray gets viewed as a mixin unless already a valid 1246 # mixin class 1247 if (not isinstance(data, Column) and not data_is_mixin 1248 and isinstance(data, np.ndarray) and len(data.dtype) > 1): 1249 data = data.view(NdarrayMixin) 1250 data_is_mixin = True 1251 1252 # Get the final column name using precedence. Some objects may not 1253 # have an info attribute. Also avoid creating info as a side effect. 1254 if not name: 1255 if isinstance(data, Column): 1256 name = data.name or default_name 1257 elif 'info' in getattr(data, '__dict__', ()): 1258 name = data.info.name or default_name 1259 else: 1260 name = default_name 1261 1262 if isinstance(data, Column): 1263 # If self.ColumnClass is a subclass of col, then "upgrade" to ColumnClass, 1264 # otherwise just use the original class. The most common case is a 1265 # table with masked=True and ColumnClass=MaskedColumn. Then a Column 1266 # gets upgraded to MaskedColumn, but the converse (pre-4.0) behavior 1267 # of downgrading from MaskedColumn to Column (for non-masked table) 1268 # does not happen. 1269 col_cls = self._get_col_cls_for_table(data) 1270 1271 elif data_is_mixin: 1272 # Copy the mixin column attributes if they exist since the copy below 1273 # may not get this attribute. 1274 col = col_copy(data, copy_indices=self._init_indices) if copy else data 1275 col.info.name = name 1276 return col 1277 1278 elif data0_is_mixin: 1279 # Handle case of a sequence of a mixin, e.g. [1*u.m, 2*u.m]. 1280 try: 1281 col = data[0].__class__(data) 1282 col.info.name = name 1283 return col 1284 except Exception: 1285 # If that didn't work for some reason, just turn it into np.array of object 1286 data = np.array(data, dtype=object) 1287 col_cls = self.ColumnClass 1288 1289 elif isinstance(data, (np.ma.MaskedArray, Masked)): 1290 # Require that col_cls be a subclass of MaskedColumn, remembering 1291 # that ColumnClass could be a user-defined subclass (though more-likely 1292 # could be MaskedColumn). 1293 col_cls = masked_col_cls 1294 1295 elif data is None: 1296 # Special case for data passed as the None object (for broadcasting 1297 # to an object column). Need to turn data into numpy `None` scalar 1298 # object, otherwise `Column` interprets data=None as no data instead 1299 # of a object column of `None`. 1300 data = np.array(None) 1301 col_cls = self.ColumnClass 1302 1303 elif not hasattr(data, 'dtype'): 1304 # `data` is none of the above, convert to numpy array or MaskedArray 1305 # assuming only that it is a scalar or sequence or N-d nested 1306 # sequence. This function is relatively intricate and tries to 1307 # maintain performance for common cases while handling things like 1308 # list input with embedded np.ma.masked entries. If `data` is a 1309 # scalar then it gets returned unchanged so the original object gets 1310 # passed to `Column` later. 1311 data = _convert_sequence_data_to_array(data, dtype) 1312 copy = False # Already made a copy above 1313 col_cls = masked_col_cls if isinstance(data, np.ma.MaskedArray) else self.ColumnClass 1314 1315 else: 1316 col_cls = self.ColumnClass 1317 1318 try: 1319 col = col_cls(name=name, data=data, dtype=dtype, 1320 copy=copy, copy_indices=self._init_indices) 1321 except Exception: 1322 # Broad exception class since we don't know what might go wrong 1323 raise ValueError('unable to convert data to Column for Table') 1324 1325 col = self._convert_col_for_table(col) 1326 1327 return col 1328 1329 def _init_from_ndarray(self, data, names, dtype, n_cols, copy): 1330 """Initialize table from an ndarray structured array""" 1331 1332 data_names = data.dtype.names or _auto_names(n_cols) 1333 struct = data.dtype.names is not None 1334 names = [name or data_names[i] for i, name in enumerate(names)] 1335 1336 cols = ([data[name] for name in data_names] if struct else 1337 [data[:, i] for i in range(n_cols)]) 1338 1339 self._init_from_list(cols, names, dtype, n_cols, copy) 1340 1341 def _init_from_dict(self, data, names, dtype, n_cols, copy): 1342 """Initialize table from a dictionary of columns""" 1343 1344 data_list = [data[name] for name in names] 1345 self._init_from_list(data_list, names, dtype, n_cols, copy) 1346 1347 def _get_col_cls_for_table(self, col): 1348 """Get the correct column class to use for upgrading any Column-like object. 1349 1350 For a masked table, ensure any Column-like object is a subclass 1351 of the table MaskedColumn. 1352 1353 For unmasked table, ensure any MaskedColumn-like object is a subclass 1354 of the table MaskedColumn. If not a MaskedColumn, then ensure that any 1355 Column-like object is a subclass of the table Column. 1356 """ 1357 1358 col_cls = col.__class__ 1359 1360 if self.masked: 1361 if isinstance(col, Column) and not isinstance(col, self.MaskedColumn): 1362 col_cls = self.MaskedColumn 1363 else: 1364 if isinstance(col, MaskedColumn): 1365 if not isinstance(col, self.MaskedColumn): 1366 col_cls = self.MaskedColumn 1367 elif isinstance(col, Column) and not isinstance(col, self.Column): 1368 col_cls = self.Column 1369 1370 return col_cls 1371 1372 def _convert_col_for_table(self, col): 1373 """ 1374 Make sure that all Column objects have correct base class for this type of 1375 Table. For a base Table this most commonly means setting to 1376 MaskedColumn if the table is masked. Table subclasses like QTable 1377 override this method. 1378 """ 1379 if isinstance(col, Column) and not isinstance(col, self.ColumnClass): 1380 col_cls = self._get_col_cls_for_table(col) 1381 if col_cls is not col.__class__: 1382 col = col_cls(col, copy=False) 1383 1384 return col 1385 1386 def _init_from_cols(self, cols): 1387 """Initialize table from a list of Column or mixin objects""" 1388 1389 lengths = set(len(col) for col in cols) 1390 if len(lengths) > 1: 1391 raise ValueError(f'Inconsistent data column lengths: {lengths}') 1392 1393 # Make sure that all Column-based objects have correct class. For 1394 # plain Table this is self.ColumnClass, but for instance QTable will 1395 # convert columns with units to a Quantity mixin. 1396 newcols = [self._convert_col_for_table(col) for col in cols] 1397 self._make_table_from_cols(self, newcols) 1398 1399 # Deduplicate indices. It may happen that after pickling or when 1400 # initing from an existing table that column indices which had been 1401 # references to a single index object got *copied* into an independent 1402 # object. This results in duplicates which will cause downstream problems. 1403 index_dict = {} 1404 for col in self.itercols(): 1405 for i, index in enumerate(col.info.indices or []): 1406 names = tuple(ind_col.info.name for ind_col in index.columns) 1407 if names in index_dict: 1408 col.info.indices[i] = index_dict[names] 1409 else: 1410 index_dict[names] = index 1411 1412 def _new_from_slice(self, slice_): 1413 """Create a new table as a referenced slice from self.""" 1414 1415 table = self.__class__(masked=self.masked) 1416 if self.meta: 1417 table.meta = self.meta.copy() # Shallow copy for slice 1418 table.primary_key = self.primary_key 1419 1420 newcols = [] 1421 for col in self.columns.values(): 1422 newcol = col[slice_] 1423 1424 # Note in line below, use direct attribute access to col.indices for Column 1425 # instances instead of the generic col.info.indices. This saves about 4 usec 1426 # per column. 1427 if (col if isinstance(col, Column) else col.info).indices: 1428 # TODO : as far as I can tell the only purpose of setting _copy_indices 1429 # here is to communicate that to the initial test in `slice_indices`. 1430 # Why isn't that just sent as an arg to the function? 1431 col.info._copy_indices = self._copy_indices 1432 newcol = col.info.slice_indices(newcol, slice_, len(col)) 1433 1434 # Don't understand why this is forcing a value on the original column. 1435 # Normally col.info does not even have a _copy_indices attribute. Tests 1436 # still pass if this line is deleted. (Each col.info attribute access 1437 # is expensive). 1438 col.info._copy_indices = True 1439 1440 newcols.append(newcol) 1441 1442 self._make_table_from_cols(table, newcols, verify=False, names=self.columns.keys()) 1443 return table 1444 1445 @staticmethod 1446 def _make_table_from_cols(table, cols, verify=True, names=None): 1447 """ 1448 Make ``table`` in-place so that it represents the given list of ``cols``. 1449 """ 1450 if names is None: 1451 names = [col.info.name for col in cols] 1452 1453 # Note: we do not test for len(names) == len(cols) if names is not None. In that 1454 # case the function is being called by from "trusted" source (e.g. right above here) 1455 # that is assumed to provide valid inputs. In that case verify=False. 1456 1457 if verify: 1458 if None in names: 1459 raise TypeError('Cannot have None for column name') 1460 if len(set(names)) != len(names): 1461 raise ValueError('Duplicate column names') 1462 1463 table.columns = table.TableColumns((name, col) for name, col in zip(names, cols)) 1464 1465 for col in cols: 1466 table._set_col_parent_table_and_mask(col) 1467 1468 def _set_col_parent_table_and_mask(self, col): 1469 """ 1470 Set ``col.parent_table = self`` and force ``col`` to have ``mask`` 1471 attribute if the table is masked and ``col.mask`` does not exist. 1472 """ 1473 # For Column instances it is much faster to do direct attribute access 1474 # instead of going through .info 1475 col_info = col if isinstance(col, Column) else col.info 1476 col_info.parent_table = self 1477 1478 # Legacy behavior for masked table 1479 if self.masked and not hasattr(col, 'mask'): 1480 col.mask = FalseArray(col.shape) 1481 1482 def itercols(self): 1483 """ 1484 Iterate over the columns of this table. 1485 1486 Examples 1487 -------- 1488 1489 To iterate over the columns of a table:: 1490 1491 >>> t = Table([[1], [2]]) 1492 >>> for col in t.itercols(): 1493 ... print(col) 1494 col0 1495 ---- 1496 1 1497 col1 1498 ---- 1499 2 1500 1501 Using ``itercols()`` is similar to ``for col in t.columns.values()`` 1502 but is syntactically preferred. 1503 """ 1504 for colname in self.columns: 1505 yield self[colname] 1506 1507 def _base_repr_(self, html=False, descr_vals=None, max_width=None, 1508 tableid=None, show_dtype=True, max_lines=None, 1509 tableclass=None): 1510 if descr_vals is None: 1511 descr_vals = [self.__class__.__name__] 1512 if self.masked: 1513 descr_vals.append('masked=True') 1514 descr_vals.append(f'length={len(self)}') 1515 1516 descr = ' '.join(descr_vals) 1517 if html: 1518 from astropy.utils.xml.writer import xml_escape 1519 descr = f'<i>{xml_escape(descr)}</i>\n' 1520 else: 1521 descr = f'<{descr}>\n' 1522 1523 if tableid is None: 1524 tableid = f'table{id(self)}' 1525 1526 data_lines, outs = self.formatter._pformat_table( 1527 self, tableid=tableid, html=html, max_width=max_width, 1528 show_name=True, show_unit=None, show_dtype=show_dtype, 1529 max_lines=max_lines, tableclass=tableclass) 1530 1531 out = descr + '\n'.join(data_lines) 1532 1533 return out 1534 1535 def _repr_html_(self): 1536 out = self._base_repr_(html=True, max_width=-1, 1537 tableclass=conf.default_notebook_table_class) 1538 # Wrap <table> in <div>. This follows the pattern in pandas and allows 1539 # table to be scrollable horizontally in VS Code notebook display. 1540 out = f'<div>{out}</div>' 1541 return out 1542 1543 def __repr__(self): 1544 return self._base_repr_(html=False, max_width=None) 1545 1546 def __str__(self): 1547 return '\n'.join(self.pformat()) 1548 1549 def __bytes__(self): 1550 return str(self).encode('utf-8') 1551 1552 @property 1553 def has_mixin_columns(self): 1554 """ 1555 True if table has any mixin columns (defined as columns that are not Column 1556 subclasses). 1557 """ 1558 return any(has_info_class(col, MixinInfo) for col in self.columns.values()) 1559 1560 @property 1561 def has_masked_columns(self): 1562 """True if table has any ``MaskedColumn`` columns. 1563 1564 This does not check for mixin columns that may have masked values, use the 1565 ``has_masked_values`` property in that case. 1566 1567 """ 1568 return any(isinstance(col, MaskedColumn) for col in self.itercols()) 1569 1570 @property 1571 def has_masked_values(self): 1572 """True if column in the table has values which are masked. 1573 1574 This may be relatively slow for large tables as it requires checking the mask 1575 values of each column. 1576 """ 1577 for col in self.itercols(): 1578 if hasattr(col, 'mask') and np.any(col.mask): 1579 return True 1580 else: 1581 return False 1582 1583 def _is_mixin_for_table(self, col): 1584 """ 1585 Determine if ``col`` should be added to the table directly as 1586 a mixin column. 1587 """ 1588 if isinstance(col, BaseColumn): 1589 return False 1590 1591 # Is it a mixin but not [Masked]Quantity (which gets converted to 1592 # [Masked]Column with unit set). 1593 return has_info_class(col, MixinInfo) and not has_info_class(col, QuantityInfo) 1594 1595 @format_doc(_pprint_docs) 1596 def pprint(self, max_lines=None, max_width=None, show_name=True, 1597 show_unit=None, show_dtype=False, align=None): 1598 """Print a formatted string representation of the table. 1599 1600 If no value of ``max_lines`` is supplied then the height of the 1601 screen terminal is used to set ``max_lines``. If the terminal 1602 height cannot be determined then the default is taken from the 1603 configuration item ``astropy.conf.max_lines``. If a negative 1604 value of ``max_lines`` is supplied then there is no line limit 1605 applied. 1606 1607 The same applies for max_width except the configuration item is 1608 ``astropy.conf.max_width``. 1609 1610 """ 1611 lines, outs = self.formatter._pformat_table(self, max_lines, max_width, 1612 show_name=show_name, show_unit=show_unit, 1613 show_dtype=show_dtype, align=align) 1614 if outs['show_length']: 1615 lines.append(f'Length = {len(self)} rows') 1616 1617 n_header = outs['n_header'] 1618 1619 for i, line in enumerate(lines): 1620 if i < n_header: 1621 color_print(line, 'red') 1622 else: 1623 print(line) 1624 1625 @format_doc(_pprint_docs) 1626 def pprint_all(self, max_lines=-1, max_width=-1, show_name=True, 1627 show_unit=None, show_dtype=False, align=None): 1628 """Print a formatted string representation of the entire table. 1629 1630 This method is the same as `astropy.table.Table.pprint` except that 1631 the default ``max_lines`` and ``max_width`` are both -1 so that by 1632 default the entire table is printed instead of restricting to the size 1633 of the screen terminal. 1634 1635 """ 1636 return self.pprint(max_lines, max_width, show_name, 1637 show_unit, show_dtype, align) 1638 1639 def _make_index_row_display_table(self, index_row_name): 1640 if index_row_name not in self.columns: 1641 idx_col = self.ColumnClass(name=index_row_name, data=np.arange(len(self))) 1642 return self.__class__([idx_col] + list(self.columns.values()), 1643 copy=False) 1644 else: 1645 return self 1646 1647 def show_in_notebook(self, tableid=None, css=None, display_length=50, 1648 table_class='astropy-default', show_row_index='idx'): 1649 """Render the table in HTML and show it in the IPython notebook. 1650 1651 Parameters 1652 ---------- 1653 tableid : str or None 1654 An html ID tag for the table. Default is ``table{id}-XXX``, where 1655 id is the unique integer id of the table object, id(self), and XXX 1656 is a random number to avoid conflicts when printing the same table 1657 multiple times. 1658 table_class : str or None 1659 A string with a list of HTML classes used to style the table. 1660 The special default string ('astropy-default') means that the string 1661 will be retrieved from the configuration item 1662 ``astropy.table.default_notebook_table_class``. Note that these 1663 table classes may make use of bootstrap, as this is loaded with the 1664 notebook. See `this page <https://getbootstrap.com/css/#tables>`_ 1665 for the list of classes. 1666 css : str 1667 A valid CSS string declaring the formatting for the table. Defaults 1668 to ``astropy.table.jsviewer.DEFAULT_CSS_NB``. 1669 display_length : int, optional 1670 Number or rows to show. Defaults to 50. 1671 show_row_index : str or False 1672 If this does not evaluate to False, a column with the given name 1673 will be added to the version of the table that gets displayed. 1674 This new column shows the index of the row in the table itself, 1675 even when the displayed table is re-sorted by another column. Note 1676 that if a column with this name already exists, this option will be 1677 ignored. Defaults to "idx". 1678 1679 Notes 1680 ----- 1681 Currently, unlike `show_in_browser` (with ``jsviewer=True``), this 1682 method needs to access online javascript code repositories. This is due 1683 to modern browsers' limitations on accessing local files. Hence, if you 1684 call this method while offline (and don't have a cached version of 1685 jquery and jquery.dataTables), you will not get the jsviewer features. 1686 """ 1687 1688 from .jsviewer import JSViewer 1689 from IPython.display import HTML 1690 1691 if tableid is None: 1692 tableid = f'table{id(self)}-{np.random.randint(1, 1e6)}' 1693 1694 jsv = JSViewer(display_length=display_length) 1695 if show_row_index: 1696 display_table = self._make_index_row_display_table(show_row_index) 1697 else: 1698 display_table = self 1699 if table_class == 'astropy-default': 1700 table_class = conf.default_notebook_table_class 1701 html = display_table._base_repr_(html=True, max_width=-1, tableid=tableid, 1702 max_lines=-1, show_dtype=False, 1703 tableclass=table_class) 1704 1705 columns = display_table.columns.values() 1706 sortable_columns = [i for i, col in enumerate(columns) 1707 if col.info.dtype.kind in 'iufc'] 1708 html += jsv.ipynb(tableid, css=css, sort_columns=sortable_columns) 1709 return HTML(html) 1710 1711 def show_in_browser(self, max_lines=5000, jsviewer=False, 1712 browser='default', jskwargs={'use_local_files': True}, 1713 tableid=None, table_class="display compact", 1714 css=None, show_row_index='idx'): 1715 """Render the table in HTML and show it in a web browser. 1716 1717 Parameters 1718 ---------- 1719 max_lines : int 1720 Maximum number of rows to export to the table (set low by default 1721 to avoid memory issues, since the browser view requires duplicating 1722 the table in memory). A negative value of ``max_lines`` indicates 1723 no row limit. 1724 jsviewer : bool 1725 If `True`, prepends some javascript headers so that the table is 1726 rendered as a `DataTables <https://datatables.net>`_ data table. 1727 This allows in-browser searching & sorting. 1728 browser : str 1729 Any legal browser name, e.g. ``'firefox'``, ``'chrome'``, 1730 ``'safari'`` (for mac, you may need to use ``'open -a 1731 "/Applications/Google Chrome.app" {}'`` for Chrome). If 1732 ``'default'``, will use the system default browser. 1733 jskwargs : dict 1734 Passed to the `astropy.table.JSViewer` init. Defaults to 1735 ``{'use_local_files': True}`` which means that the JavaScript 1736 libraries will be served from local copies. 1737 tableid : str or None 1738 An html ID tag for the table. Default is ``table{id}``, where id 1739 is the unique integer id of the table object, id(self). 1740 table_class : str or None 1741 A string with a list of HTML classes used to style the table. 1742 Default is "display compact", and other possible values can be 1743 found in https://www.datatables.net/manual/styling/classes 1744 css : str 1745 A valid CSS string declaring the formatting for the table. Defaults 1746 to ``astropy.table.jsviewer.DEFAULT_CSS``. 1747 show_row_index : str or False 1748 If this does not evaluate to False, a column with the given name 1749 will be added to the version of the table that gets displayed. 1750 This new column shows the index of the row in the table itself, 1751 even when the displayed table is re-sorted by another column. Note 1752 that if a column with this name already exists, this option will be 1753 ignored. Defaults to "idx". 1754 """ 1755 1756 import os 1757 import webbrowser 1758 import tempfile 1759 from .jsviewer import DEFAULT_CSS 1760 from urllib.parse import urljoin 1761 from urllib.request import pathname2url 1762 1763 if css is None: 1764 css = DEFAULT_CSS 1765 1766 # We can't use NamedTemporaryFile here because it gets deleted as 1767 # soon as it gets garbage collected. 1768 tmpdir = tempfile.mkdtemp() 1769 path = os.path.join(tmpdir, 'table.html') 1770 1771 with open(path, 'w') as tmp: 1772 if jsviewer: 1773 if show_row_index: 1774 display_table = self._make_index_row_display_table(show_row_index) 1775 else: 1776 display_table = self 1777 display_table.write(tmp, format='jsviewer', css=css, 1778 max_lines=max_lines, jskwargs=jskwargs, 1779 table_id=tableid, table_class=table_class) 1780 else: 1781 self.write(tmp, format='html') 1782 1783 try: 1784 br = webbrowser.get(None if browser == 'default' else browser) 1785 except webbrowser.Error: 1786 log.error(f"Browser '{browser}' not found.") 1787 else: 1788 br.open(urljoin('file:', pathname2url(path))) 1789 1790 @format_doc(_pformat_docs, id="{id}") 1791 def pformat(self, max_lines=None, max_width=None, show_name=True, 1792 show_unit=None, show_dtype=False, html=False, tableid=None, 1793 align=None, tableclass=None): 1794 """Return a list of lines for the formatted string representation of 1795 the table. 1796 1797 If no value of ``max_lines`` is supplied then the height of the 1798 screen terminal is used to set ``max_lines``. If the terminal 1799 height cannot be determined then the default is taken from the 1800 configuration item ``astropy.conf.max_lines``. If a negative 1801 value of ``max_lines`` is supplied then there is no line limit 1802 applied. 1803 1804 The same applies for ``max_width`` except the configuration item is 1805 ``astropy.conf.max_width``. 1806 1807 """ 1808 1809 lines, outs = self.formatter._pformat_table( 1810 self, max_lines, max_width, show_name=show_name, 1811 show_unit=show_unit, show_dtype=show_dtype, html=html, 1812 tableid=tableid, tableclass=tableclass, align=align) 1813 1814 if outs['show_length']: 1815 lines.append(f'Length = {len(self)} rows') 1816 1817 return lines 1818 1819 @format_doc(_pformat_docs, id="{id}") 1820 def pformat_all(self, max_lines=-1, max_width=-1, show_name=True, 1821 show_unit=None, show_dtype=False, html=False, tableid=None, 1822 align=None, tableclass=None): 1823 """Return a list of lines for the formatted string representation of 1824 the entire table. 1825 1826 If no value of ``max_lines`` is supplied then the height of the 1827 screen terminal is used to set ``max_lines``. If the terminal 1828 height cannot be determined then the default is taken from the 1829 configuration item ``astropy.conf.max_lines``. If a negative 1830 value of ``max_lines`` is supplied then there is no line limit 1831 applied. 1832 1833 The same applies for ``max_width`` except the configuration item is 1834 ``astropy.conf.max_width``. 1835 1836 """ 1837 1838 return self.pformat(max_lines, max_width, show_name, 1839 show_unit, show_dtype, html, tableid, 1840 align, tableclass) 1841 1842 def more(self, max_lines=None, max_width=None, show_name=True, 1843 show_unit=None, show_dtype=False): 1844 """Interactively browse table with a paging interface. 1845 1846 Supported keys:: 1847 1848 f, <space> : forward one page 1849 b : back one page 1850 r : refresh same page 1851 n : next row 1852 p : previous row 1853 < : go to beginning 1854 > : go to end 1855 q : quit browsing 1856 h : print this help 1857 1858 Parameters 1859 ---------- 1860 max_lines : int 1861 Maximum number of lines in table output 1862 1863 max_width : int or None 1864 Maximum character width of output 1865 1866 show_name : bool 1867 Include a header row for column names. Default is True. 1868 1869 show_unit : bool 1870 Include a header row for unit. Default is to show a row 1871 for units only if one or more columns has a defined value 1872 for the unit. 1873 1874 show_dtype : bool 1875 Include a header row for column dtypes. Default is True. 1876 """ 1877 self.formatter._more_tabcol(self, max_lines, max_width, show_name=show_name, 1878 show_unit=show_unit, show_dtype=show_dtype) 1879 1880 def __getitem__(self, item): 1881 if isinstance(item, str): 1882 return self.columns[item] 1883 elif isinstance(item, (int, np.integer)): 1884 return self.Row(self, item) 1885 elif (isinstance(item, np.ndarray) and item.shape == () and item.dtype.kind == 'i'): 1886 return self.Row(self, item.item()) 1887 elif self._is_list_or_tuple_of_str(item): 1888 out = self.__class__([self[x] for x in item], 1889 copy_indices=self._copy_indices) 1890 out._groups = groups.TableGroups(out, indices=self.groups._indices, 1891 keys=self.groups._keys) 1892 out.meta = self.meta.copy() # Shallow copy for meta 1893 return out 1894 elif ((isinstance(item, np.ndarray) and item.size == 0) 1895 or (isinstance(item, (tuple, list)) and not item)): 1896 # If item is an empty array/list/tuple then return the table with no rows 1897 return self._new_from_slice([]) 1898 elif (isinstance(item, slice) 1899 or isinstance(item, np.ndarray) 1900 or isinstance(item, list) 1901 or isinstance(item, tuple) and all(isinstance(x, np.ndarray) 1902 for x in item)): 1903 # here for the many ways to give a slice; a tuple of ndarray 1904 # is produced by np.where, as in t[np.where(t['a'] > 2)] 1905 # For all, a new table is constructed with slice of all columns 1906 return self._new_from_slice(item) 1907 else: 1908 raise ValueError(f'Illegal type {type(item)} for table item access') 1909 1910 def __setitem__(self, item, value): 1911 # If the item is a string then it must be the name of a column. 1912 # If that column doesn't already exist then create it now. 1913 if isinstance(item, str) and item not in self.colnames: 1914 self.add_column(value, name=item, copy=True) 1915 1916 else: 1917 n_cols = len(self.columns) 1918 1919 if isinstance(item, str): 1920 # Set an existing column by first trying to replace, and if 1921 # this fails do an in-place update. See definition of mask 1922 # property for discussion of the _setitem_inplace attribute. 1923 if (not getattr(self, '_setitem_inplace', False) 1924 and not conf.replace_inplace): 1925 try: 1926 self._replace_column_warnings(item, value) 1927 return 1928 except Exception: 1929 pass 1930 self.columns[item][:] = value 1931 1932 elif isinstance(item, (int, np.integer)): 1933 self._set_row(idx=item, colnames=self.colnames, vals=value) 1934 1935 elif (isinstance(item, slice) 1936 or isinstance(item, np.ndarray) 1937 or isinstance(item, list) 1938 or (isinstance(item, tuple) # output from np.where 1939 and all(isinstance(x, np.ndarray) for x in item))): 1940 1941 if isinstance(value, Table): 1942 vals = (col for col in value.columns.values()) 1943 1944 elif isinstance(value, np.ndarray) and value.dtype.names: 1945 vals = (value[name] for name in value.dtype.names) 1946 1947 elif np.isscalar(value): 1948 vals = itertools.repeat(value, n_cols) 1949 1950 else: # Assume this is an iterable that will work 1951 if len(value) != n_cols: 1952 raise ValueError('Right side value needs {} elements (one for each column)' 1953 .format(n_cols)) 1954 vals = value 1955 1956 for col, val in zip(self.columns.values(), vals): 1957 col[item] = val 1958 1959 else: 1960 raise ValueError(f'Illegal type {type(item)} for table item access') 1961 1962 def __delitem__(self, item): 1963 if isinstance(item, str): 1964 self.remove_column(item) 1965 elif isinstance(item, (int, np.integer)): 1966 self.remove_row(item) 1967 elif (isinstance(item, (list, tuple, np.ndarray)) 1968 and all(isinstance(x, str) for x in item)): 1969 self.remove_columns(item) 1970 elif (isinstance(item, (list, np.ndarray)) 1971 and np.asarray(item).dtype.kind == 'i'): 1972 self.remove_rows(item) 1973 elif isinstance(item, slice): 1974 self.remove_rows(item) 1975 else: 1976 raise IndexError('illegal key or index value') 1977 1978 def _ipython_key_completions_(self): 1979 return self.colnames 1980 1981 def field(self, item): 1982 """Return column[item] for recarray compatibility.""" 1983 return self.columns[item] 1984 1985 @property 1986 def masked(self): 1987 return self._masked 1988 1989 @masked.setter 1990 def masked(self, masked): 1991 raise Exception('Masked attribute is read-only (use t = Table(t, masked=True)' 1992 ' to convert to a masked table)') 1993 1994 def _set_masked(self, masked): 1995 """ 1996 Set the table masked property. 1997 1998 Parameters 1999 ---------- 2000 masked : bool 2001 State of table masking (`True` or `False`) 2002 """ 2003 if masked in [True, False, None]: 2004 self._masked = masked 2005 else: 2006 raise ValueError("masked should be one of True, False, None") 2007 2008 self._column_class = self.MaskedColumn if self._masked else self.Column 2009 2010 @property 2011 def ColumnClass(self): 2012 if self._column_class is None: 2013 return self.Column 2014 else: 2015 return self._column_class 2016 2017 @property 2018 def dtype(self): 2019 return np.dtype([descr(col) for col in self.columns.values()]) 2020 2021 @property 2022 def colnames(self): 2023 return list(self.columns.keys()) 2024 2025 @staticmethod 2026 def _is_list_or_tuple_of_str(names): 2027 """Check that ``names`` is a tuple or list of strings""" 2028 return (isinstance(names, (tuple, list)) and names 2029 and all(isinstance(x, str) for x in names)) 2030 2031 def keys(self): 2032 return list(self.columns.keys()) 2033 2034 def values(self): 2035 return self.columns.values() 2036 2037 def items(self): 2038 return self.columns.items() 2039 2040 def __len__(self): 2041 # For performance reasons (esp. in Row) cache the first column name 2042 # and use that subsequently for the table length. If might not be 2043 # available yet or the column might be gone now, in which case 2044 # try again in the except block. 2045 try: 2046 return len(OrderedDict.__getitem__(self.columns, self._first_colname)) 2047 except (AttributeError, KeyError): 2048 if len(self.columns) == 0: 2049 return 0 2050 2051 # Get the first column name 2052 self._first_colname = next(iter(self.columns)) 2053 return len(self.columns[self._first_colname]) 2054 2055 def index_column(self, name): 2056 """ 2057 Return the positional index of column ``name``. 2058 2059 Parameters 2060 ---------- 2061 name : str 2062 column name 2063 2064 Returns 2065 ------- 2066 index : int 2067 Positional index of column ``name``. 2068 2069 Examples 2070 -------- 2071 Create a table with three columns 'a', 'b' and 'c':: 2072 2073 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2074 ... names=('a', 'b', 'c')) 2075 >>> print(t) 2076 a b c 2077 --- --- --- 2078 1 0.1 x 2079 2 0.2 y 2080 3 0.3 z 2081 2082 Get index of column 'b' of the table:: 2083 2084 >>> t.index_column('b') 2085 1 2086 """ 2087 try: 2088 return self.colnames.index(name) 2089 except ValueError: 2090 raise ValueError(f"Column {name} does not exist") 2091 2092 def add_column(self, col, index=None, name=None, rename_duplicate=False, copy=True, 2093 default_name=None): 2094 """ 2095 Add a new column to the table using ``col`` as input. If ``index`` 2096 is supplied then insert column before ``index`` position 2097 in the list of columns, otherwise append column to the end 2098 of the list. 2099 2100 The ``col`` input can be any data object which is acceptable as a 2101 `~astropy.table.Table` column object or can be converted. This includes 2102 mixin columns and scalar or length=1 objects which get broadcast to match 2103 the table length. 2104 2105 To add several columns at once use ``add_columns()`` or simply call 2106 ``add_column()`` for each one. There is very little performance difference 2107 in the two approaches. 2108 2109 Parameters 2110 ---------- 2111 col : object 2112 Data object for the new column 2113 index : int or None 2114 Insert column before this position or at end (default). 2115 name : str 2116 Column name 2117 rename_duplicate : bool 2118 Uniquify column name if it already exist. Default is False. 2119 copy : bool 2120 Make a copy of the new column. Default is True. 2121 default_name : str or None 2122 Name to use if both ``name`` and ``col.info.name`` are not available. 2123 Defaults to ``col{number_of_columns}``. 2124 2125 Examples 2126 -------- 2127 Create a table with two columns 'a' and 'b', then create a third column 'c' 2128 and append it to the end of the table:: 2129 2130 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2131 >>> col_c = Column(name='c', data=['x', 'y']) 2132 >>> t.add_column(col_c) 2133 >>> print(t) 2134 a b c 2135 --- --- --- 2136 1 0.1 x 2137 2 0.2 y 2138 2139 Add column 'd' at position 1. Note that the column is inserted 2140 before the given index:: 2141 2142 >>> t.add_column(['a', 'b'], name='d', index=1) 2143 >>> print(t) 2144 a d b c 2145 --- --- --- --- 2146 1 a 0.1 x 2147 2 b 0.2 y 2148 2149 Add second column named 'b' with rename_duplicate:: 2150 2151 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2152 >>> t.add_column(1.1, name='b', rename_duplicate=True) 2153 >>> print(t) 2154 a b b_1 2155 --- --- --- 2156 1 0.1 1.1 2157 2 0.2 1.1 2158 2159 Add an unnamed column or mixin object in the table using a default name 2160 or by specifying an explicit name with ``name``. Name can also be overridden:: 2161 2162 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2163 >>> t.add_column(['a', 'b']) 2164 >>> t.add_column(col_c, name='d') 2165 >>> print(t) 2166 a b col2 d 2167 --- --- ---- --- 2168 1 0.1 a x 2169 2 0.2 b y 2170 """ 2171 if default_name is None: 2172 default_name = f'col{len(self.columns)}' 2173 2174 # Convert col data to acceptable object for insertion into self.columns. 2175 # Note that along with the lines above and below, this allows broadcasting 2176 # of scalars to the correct shape for adding to table. 2177 col = self._convert_data_to_col(col, name=name, copy=copy, 2178 default_name=default_name) 2179 2180 # Assigning a scalar column to an empty table should result in an 2181 # exception (see #3811). 2182 if col.shape == () and len(self) == 0: 2183 raise TypeError('Empty table cannot have column set to scalar value') 2184 # Make col data shape correct for scalars. The second test is to allow 2185 # broadcasting an N-d element to a column, e.g. t['new'] = [[1, 2]]. 2186 elif (col.shape == () or col.shape[0] == 1) and len(self) > 0: 2187 new_shape = (len(self),) + getattr(col, 'shape', ())[1:] 2188 if isinstance(col, np.ndarray): 2189 col = np.broadcast_to(col, shape=new_shape, 2190 subok=True) 2191 elif isinstance(col, ShapedLikeNDArray): 2192 col = col._apply(np.broadcast_to, shape=new_shape, 2193 subok=True) 2194 2195 # broadcast_to() results in a read-only array. Apparently it only changes 2196 # the view to look like the broadcasted array. So copy. 2197 col = col_copy(col) 2198 2199 name = col.info.name 2200 2201 # Ensure that new column is the right length 2202 if len(self.columns) > 0 and len(col) != len(self): 2203 raise ValueError('Inconsistent data column lengths') 2204 2205 if rename_duplicate: 2206 orig_name = name 2207 i = 1 2208 while name in self.columns: 2209 # Iterate until a unique name is found 2210 name = orig_name + '_' + str(i) 2211 i += 1 2212 col.info.name = name 2213 2214 # Set col parent_table weakref and ensure col has mask attribute if table.masked 2215 self._set_col_parent_table_and_mask(col) 2216 2217 # Add new column as last column 2218 self.columns[name] = col 2219 2220 if index is not None: 2221 # Move the other cols to the right of the new one 2222 move_names = self.colnames[index:-1] 2223 for move_name in move_names: 2224 self.columns.move_to_end(move_name, last=True) 2225 2226 def add_columns(self, cols, indexes=None, names=None, copy=True, rename_duplicate=False): 2227 """ 2228 Add a list of new columns the table using ``cols`` data objects. If a 2229 corresponding list of ``indexes`` is supplied then insert column 2230 before each ``index`` position in the *original* list of columns, 2231 otherwise append columns to the end of the list. 2232 2233 The ``cols`` input can include any data objects which are acceptable as 2234 `~astropy.table.Table` column objects or can be converted. This includes 2235 mixin columns and scalar or length=1 objects which get broadcast to match 2236 the table length. 2237 2238 From a performance perspective there is little difference between calling 2239 this method once or looping over the new columns and calling ``add_column()`` 2240 for each column. 2241 2242 Parameters 2243 ---------- 2244 cols : list of object 2245 List of data objects for the new columns 2246 indexes : list of int or None 2247 Insert column before this position or at end (default). 2248 names : list of str 2249 Column names 2250 copy : bool 2251 Make a copy of the new columns. Default is True. 2252 rename_duplicate : bool 2253 Uniquify new column names if they duplicate the existing ones. 2254 Default is False. 2255 2256 See Also 2257 -------- 2258 astropy.table.hstack, update, replace_column 2259 2260 Examples 2261 -------- 2262 Create a table with two columns 'a' and 'b', then create columns 'c' and 'd' 2263 and append them to the end of the table:: 2264 2265 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2266 >>> col_c = Column(name='c', data=['x', 'y']) 2267 >>> col_d = Column(name='d', data=['u', 'v']) 2268 >>> t.add_columns([col_c, col_d]) 2269 >>> print(t) 2270 a b c d 2271 --- --- --- --- 2272 1 0.1 x u 2273 2 0.2 y v 2274 2275 Add column 'c' at position 0 and column 'd' at position 1. Note that 2276 the columns are inserted before the given position:: 2277 2278 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2279 >>> t.add_columns([['x', 'y'], ['u', 'v']], names=['c', 'd'], 2280 ... indexes=[0, 1]) 2281 >>> print(t) 2282 c a d b 2283 --- --- --- --- 2284 x 1 u 0.1 2285 y 2 v 0.2 2286 2287 Add second column 'b' and column 'c' with ``rename_duplicate``:: 2288 2289 >>> t = Table([[1, 2], [0.1, 0.2]], names=('a', 'b')) 2290 >>> t.add_columns([[1.1, 1.2], ['x', 'y']], names=('b', 'c'), 2291 ... rename_duplicate=True) 2292 >>> print(t) 2293 a b b_1 c 2294 --- --- --- --- 2295 1 0.1 1.1 x 2296 2 0.2 1.2 y 2297 2298 Add unnamed columns or mixin objects in the table using default names 2299 or by specifying explicit names with ``names``. Names can also be overridden:: 2300 2301 >>> t = Table() 2302 >>> col_b = Column(name='b', data=['u', 'v']) 2303 >>> t.add_columns([[1, 2], col_b]) 2304 >>> t.add_columns([[3, 4], col_b], names=['c', 'd']) 2305 >>> print(t) 2306 col0 b c d 2307 ---- --- --- --- 2308 1 u 3 u 2309 2 v 4 v 2310 """ 2311 if indexes is None: 2312 indexes = [len(self.columns)] * len(cols) 2313 elif len(indexes) != len(cols): 2314 raise ValueError('Number of indexes must match number of cols') 2315 2316 if names is None: 2317 names = (None,) * len(cols) 2318 elif len(names) != len(cols): 2319 raise ValueError('Number of names must match number of cols') 2320 2321 default_names = [f'col{ii + len(self.columns)}' 2322 for ii in range(len(cols))] 2323 2324 for ii in reversed(np.argsort(indexes)): 2325 self.add_column(cols[ii], index=indexes[ii], name=names[ii], 2326 default_name=default_names[ii], 2327 rename_duplicate=rename_duplicate, copy=copy) 2328 2329 def _replace_column_warnings(self, name, col): 2330 """ 2331 Same as replace_column but issues warnings under various circumstances. 2332 """ 2333 warns = conf.replace_warnings 2334 refcount = None 2335 old_col = None 2336 2337 if 'refcount' in warns and name in self.colnames: 2338 refcount = sys.getrefcount(self[name]) 2339 2340 if name in self.colnames: 2341 old_col = self[name] 2342 2343 # This may raise an exception (e.g. t['a'] = 1) in which case none of 2344 # the downstream code runs. 2345 self.replace_column(name, col) 2346 2347 if 'always' in warns: 2348 warnings.warn(f"replaced column '{name}'", 2349 TableReplaceWarning, stacklevel=3) 2350 2351 if 'slice' in warns: 2352 try: 2353 # Check for ndarray-subclass slice. An unsliced instance 2354 # has an ndarray for the base while sliced has the same class 2355 # as parent. 2356 if isinstance(old_col.base, old_col.__class__): 2357 msg = ("replaced column '{}' which looks like an array slice. " 2358 "The new column no longer shares memory with the " 2359 "original array.".format(name)) 2360 warnings.warn(msg, TableReplaceWarning, stacklevel=3) 2361 except AttributeError: 2362 pass 2363 2364 if 'refcount' in warns: 2365 # Did reference count change? 2366 new_refcount = sys.getrefcount(self[name]) 2367 if refcount != new_refcount: 2368 msg = ("replaced column '{}' and the number of references " 2369 "to the column changed.".format(name)) 2370 warnings.warn(msg, TableReplaceWarning, stacklevel=3) 2371 2372 if 'attributes' in warns: 2373 # Any of the standard column attributes changed? 2374 changed_attrs = [] 2375 new_col = self[name] 2376 # Check base DataInfo attributes that any column will have 2377 for attr in DataInfo.attr_names: 2378 if getattr(old_col.info, attr) != getattr(new_col.info, attr): 2379 changed_attrs.append(attr) 2380 2381 if changed_attrs: 2382 msg = ("replaced column '{}' and column attributes {} changed." 2383 .format(name, changed_attrs)) 2384 warnings.warn(msg, TableReplaceWarning, stacklevel=3) 2385 2386 def replace_column(self, name, col, copy=True): 2387 """ 2388 Replace column ``name`` with the new ``col`` object. 2389 2390 The behavior of ``copy`` for Column objects is: 2391 - copy=True: new class instance with a copy of data and deep copy of meta 2392 - copy=False: new class instance with same data and a key-only copy of meta 2393 2394 For mixin columns: 2395 - copy=True: new class instance with copy of data and deep copy of meta 2396 - copy=False: original instance (no copy at all) 2397 2398 Parameters 2399 ---------- 2400 name : str 2401 Name of column to replace 2402 col : `~astropy.table.Column` or `~numpy.ndarray` or sequence 2403 New column object to replace the existing column. 2404 copy : bool 2405 Make copy of the input ``col``, default=True 2406 2407 See Also 2408 -------- 2409 add_columns, astropy.table.hstack, update 2410 2411 Examples 2412 -------- 2413 Replace column 'a' with a float version of itself:: 2414 2415 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3]], names=('a', 'b')) 2416 >>> float_a = t['a'].astype(float) 2417 >>> t.replace_column('a', float_a) 2418 """ 2419 if name not in self.colnames: 2420 raise ValueError(f'column name {name} is not in the table') 2421 2422 if self[name].info.indices: 2423 raise ValueError('cannot replace a table index column') 2424 2425 col = self._convert_data_to_col(col, name=name, copy=copy) 2426 self._set_col_parent_table_and_mask(col) 2427 2428 # Ensure that new column is the right length, unless it is the only column 2429 # in which case re-sizing is allowed. 2430 if len(self.columns) > 1 and len(col) != len(self[name]): 2431 raise ValueError('length of new column must match table length') 2432 2433 self.columns.__setitem__(name, col, validated=True) 2434 2435 def remove_row(self, index): 2436 """ 2437 Remove a row from the table. 2438 2439 Parameters 2440 ---------- 2441 index : int 2442 Index of row to remove 2443 2444 Examples 2445 -------- 2446 Create a table with three columns 'a', 'b' and 'c':: 2447 2448 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2449 ... names=('a', 'b', 'c')) 2450 >>> print(t) 2451 a b c 2452 --- --- --- 2453 1 0.1 x 2454 2 0.2 y 2455 3 0.3 z 2456 2457 Remove row 1 from the table:: 2458 2459 >>> t.remove_row(1) 2460 >>> print(t) 2461 a b c 2462 --- --- --- 2463 1 0.1 x 2464 3 0.3 z 2465 2466 To remove several rows at the same time use remove_rows. 2467 """ 2468 # check the index against the types that work with np.delete 2469 if not isinstance(index, (int, np.integer)): 2470 raise TypeError("Row index must be an integer") 2471 self.remove_rows(index) 2472 2473 def remove_rows(self, row_specifier): 2474 """ 2475 Remove rows from the table. 2476 2477 Parameters 2478 ---------- 2479 row_specifier : slice or int or array of int 2480 Specification for rows to remove 2481 2482 Examples 2483 -------- 2484 Create a table with three columns 'a', 'b' and 'c':: 2485 2486 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2487 ... names=('a', 'b', 'c')) 2488 >>> print(t) 2489 a b c 2490 --- --- --- 2491 1 0.1 x 2492 2 0.2 y 2493 3 0.3 z 2494 2495 Remove rows 0 and 2 from the table:: 2496 2497 >>> t.remove_rows([0, 2]) 2498 >>> print(t) 2499 a b c 2500 --- --- --- 2501 2 0.2 y 2502 2503 2504 Note that there are no warnings if the slice operator extends 2505 outside the data:: 2506 2507 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2508 ... names=('a', 'b', 'c')) 2509 >>> t.remove_rows(slice(10, 20, 1)) 2510 >>> print(t) 2511 a b c 2512 --- --- --- 2513 1 0.1 x 2514 2 0.2 y 2515 3 0.3 z 2516 """ 2517 # Update indices 2518 for index in self.indices: 2519 index.remove_rows(row_specifier) 2520 2521 keep_mask = np.ones(len(self), dtype=bool) 2522 keep_mask[row_specifier] = False 2523 2524 columns = self.TableColumns() 2525 for name, col in self.columns.items(): 2526 newcol = col[keep_mask] 2527 newcol.info.parent_table = self 2528 columns[name] = newcol 2529 2530 self._replace_cols(columns) 2531 2532 # Revert groups to default (ungrouped) state 2533 if hasattr(self, '_groups'): 2534 del self._groups 2535 2536 def iterrows(self, *names): 2537 """ 2538 Iterate over rows of table returning a tuple of values for each row. 2539 2540 This method is especially useful when only a subset of columns are needed. 2541 2542 The ``iterrows`` method can be substantially faster than using the standard 2543 Table row iteration (e.g. ``for row in tbl:``), since that returns a new 2544 ``~astropy.table.Row`` object for each row and accessing a column in that 2545 row (e.g. ``row['col0']``) is slower than tuple access. 2546 2547 Parameters 2548 ---------- 2549 names : list 2550 List of column names (default to all columns if no names provided) 2551 2552 Returns 2553 ------- 2554 rows : iterable 2555 Iterator returns tuples of row values 2556 2557 Examples 2558 -------- 2559 Create a table with three columns 'a', 'b' and 'c':: 2560 2561 >>> t = Table({'a': [1, 2, 3], 2562 ... 'b': [1.0, 2.5, 3.0], 2563 ... 'c': ['x', 'y', 'z']}) 2564 2565 To iterate row-wise using column names:: 2566 2567 >>> for a, c in t.iterrows('a', 'c'): 2568 ... print(a, c) 2569 1 x 2570 2 y 2571 3 z 2572 2573 """ 2574 if len(names) == 0: 2575 names = self.colnames 2576 else: 2577 for name in names: 2578 if name not in self.colnames: 2579 raise ValueError(f'{name} is not a valid column name') 2580 2581 cols = (self[name] for name in names) 2582 out = zip(*cols) 2583 return out 2584 2585 def remove_column(self, name): 2586 """ 2587 Remove a column from the table. 2588 2589 This can also be done with:: 2590 2591 del table[name] 2592 2593 Parameters 2594 ---------- 2595 name : str 2596 Name of column to remove 2597 2598 Examples 2599 -------- 2600 Create a table with three columns 'a', 'b' and 'c':: 2601 2602 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2603 ... names=('a', 'b', 'c')) 2604 >>> print(t) 2605 a b c 2606 --- --- --- 2607 1 0.1 x 2608 2 0.2 y 2609 3 0.3 z 2610 2611 Remove column 'b' from the table:: 2612 2613 >>> t.remove_column('b') 2614 >>> print(t) 2615 a c 2616 --- --- 2617 1 x 2618 2 y 2619 3 z 2620 2621 To remove several columns at the same time use remove_columns. 2622 """ 2623 2624 self.remove_columns([name]) 2625 2626 def remove_columns(self, names): 2627 ''' 2628 Remove several columns from the table. 2629 2630 Parameters 2631 ---------- 2632 names : list 2633 A list containing the names of the columns to remove 2634 2635 Examples 2636 -------- 2637 Create a table with three columns 'a', 'b' and 'c':: 2638 2639 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2640 ... names=('a', 'b', 'c')) 2641 >>> print(t) 2642 a b c 2643 --- --- --- 2644 1 0.1 x 2645 2 0.2 y 2646 3 0.3 z 2647 2648 Remove columns 'b' and 'c' from the table:: 2649 2650 >>> t.remove_columns(['b', 'c']) 2651 >>> print(t) 2652 a 2653 --- 2654 1 2655 2 2656 3 2657 2658 Specifying only a single column also works. Remove column 'b' from the table:: 2659 2660 >>> t = Table([[1, 2, 3], [0.1, 0.2, 0.3], ['x', 'y', 'z']], 2661 ... names=('a', 'b', 'c')) 2662 >>> t.remove_columns('b') 2663 >>> print(t) 2664 a c 2665 --- --- 2666 1 x 2667 2 y 2668 3 z 2669 2670 This gives the same as using remove_column. 2671 ''' 2672 if isinstance(names, str): 2673 names = [names] 2674 2675 for name in names: 2676 if name not in self.columns: 2677 raise KeyError(f"Column {name} does not exist") 2678 2679 for name in names: 2680 self.columns.pop(name) 2681 2682 def _convert_string_dtype(self, in_kind, out_kind, encode_decode_func): 2683 """ 2684 Convert string-like columns to/from bytestring and unicode (internal only). 2685 2686 Parameters 2687 ---------- 2688 in_kind : str 2689 Input dtype.kind 2690 out_kind : str 2691 Output dtype.kind 2692 """ 2693 2694 for col in self.itercols(): 2695 if col.dtype.kind == in_kind: 2696 try: 2697 # This requires ASCII and is faster by a factor of up to ~8, so 2698 # try that first. 2699 newcol = col.__class__(col, dtype=out_kind) 2700 except (UnicodeEncodeError, UnicodeDecodeError): 2701 newcol = col.__class__(encode_decode_func(col, 'utf-8')) 2702 2703 # Quasi-manually copy info attributes. Unfortunately 2704 # DataInfo.__set__ does not do the right thing in this case 2705 # so newcol.info = col.info does not get the old info attributes. 2706 for attr in col.info.attr_names - col.info._attrs_no_copy - set(['dtype']): 2707 value = deepcopy(getattr(col.info, attr)) 2708 setattr(newcol.info, attr, value) 2709 2710 self[col.name] = newcol 2711 2712 def convert_bytestring_to_unicode(self): 2713 """ 2714 Convert bytestring columns (dtype.kind='S') to unicode (dtype.kind='U') 2715 using UTF-8 encoding. 2716 2717 Internally this changes string columns to represent each character 2718 in the string with a 4-byte UCS-4 equivalent, so it is inefficient 2719 for memory but allows scripts to manipulate string arrays with 2720 natural syntax. 2721 """ 2722 self._convert_string_dtype('S', 'U', np.char.decode) 2723 2724 def convert_unicode_to_bytestring(self): 2725 """ 2726 Convert unicode columns (dtype.kind='U') to bytestring (dtype.kind='S') 2727 using UTF-8 encoding. 2728 2729 When exporting a unicode string array to a file, it may be desirable 2730 to encode unicode columns as bytestrings. 2731 """ 2732 self._convert_string_dtype('U', 'S', np.char.encode) 2733 2734 def keep_columns(self, names): 2735 ''' 2736 Keep only the columns specified (remove the others). 2737 2738 Parameters 2739 ---------- 2740 names : list 2741 A list containing the names of the columns to keep. All other 2742 columns will be removed. 2743 2744 Examples 2745 -------- 2746 Create a table with three columns 'a', 'b' and 'c':: 2747 2748 >>> t = Table([[1, 2, 3],[0.1, 0.2, 0.3],['x', 'y', 'z']], 2749 ... names=('a', 'b', 'c')) 2750 >>> print(t) 2751 a b c 2752 --- --- --- 2753 1 0.1 x 2754 2 0.2 y 2755 3 0.3 z 2756 2757 Specifying only a single column name keeps only this column. 2758 Keep only column 'a' of the table:: 2759 2760 >>> t.keep_columns('a') 2761 >>> print(t) 2762 a 2763 --- 2764 1 2765 2 2766 3 2767 2768 Specifying a list of column names is keeps is also possible. 2769 Keep columns 'a' and 'c' of the table:: 2770 2771 >>> t = Table([[1, 2, 3],[0.1, 0.2, 0.3],['x', 'y', 'z']], 2772 ... names=('a', 'b', 'c')) 2773 >>> t.keep_columns(['a', 'c']) 2774 >>> print(t) 2775 a c 2776 --- --- 2777 1 x 2778 2 y 2779 3 z 2780 ''' 2781 2782 if isinstance(names, str): 2783 names = [names] 2784 2785 for name in names: 2786 if name not in self.columns: 2787 raise KeyError(f"Column {name} does not exist") 2788 2789 remove = list(set(self.keys()) - set(names)) 2790 2791 self.remove_columns(remove) 2792 2793 def rename_column(self, name, new_name): 2794 ''' 2795 Rename a column. 2796 2797 This can also be done directly with by setting the ``name`` attribute 2798 for a column:: 2799 2800 table[name].name = new_name 2801 2802 TODO: this won't work for mixins 2803 2804 Parameters 2805 ---------- 2806 name : str 2807 The current name of the column. 2808 new_name : str 2809 The new name for the column 2810 2811 Examples 2812 -------- 2813 Create a table with three columns 'a', 'b' and 'c':: 2814 2815 >>> t = Table([[1,2],[3,4],[5,6]], names=('a','b','c')) 2816 >>> print(t) 2817 a b c 2818 --- --- --- 2819 1 3 5 2820 2 4 6 2821 2822 Renaming column 'a' to 'aa':: 2823 2824 >>> t.rename_column('a' , 'aa') 2825 >>> print(t) 2826 aa b c 2827 --- --- --- 2828 1 3 5 2829 2 4 6 2830 ''' 2831 2832 if name not in self.keys(): 2833 raise KeyError(f"Column {name} does not exist") 2834 2835 self.columns[name].info.name = new_name 2836 2837 def rename_columns(self, names, new_names): 2838 ''' 2839 Rename multiple columns. 2840 2841 Parameters 2842 ---------- 2843 names : list, tuple 2844 A list or tuple of existing column names. 2845 new_names : list, tuple 2846 A list or tuple of new column names. 2847 2848 Examples 2849 -------- 2850 Create a table with three columns 'a', 'b', 'c':: 2851 2852 >>> t = Table([[1,2],[3,4],[5,6]], names=('a','b','c')) 2853 >>> print(t) 2854 a b c 2855 --- --- --- 2856 1 3 5 2857 2 4 6 2858 2859 Renaming columns 'a' to 'aa' and 'b' to 'bb':: 2860 2861 >>> names = ('a','b') 2862 >>> new_names = ('aa','bb') 2863 >>> t.rename_columns(names, new_names) 2864 >>> print(t) 2865 aa bb c 2866 --- --- --- 2867 1 3 5 2868 2 4 6 2869 ''' 2870 2871 if not self._is_list_or_tuple_of_str(names): 2872 raise TypeError("input 'names' must be a tuple or a list of column names") 2873 2874 if not self._is_list_or_tuple_of_str(new_names): 2875 raise TypeError("input 'new_names' must be a tuple or a list of column names") 2876 2877 if len(names) != len(new_names): 2878 raise ValueError("input 'names' and 'new_names' list arguments must be the same length") 2879 2880 for name, new_name in zip(names, new_names): 2881 self.rename_column(name, new_name) 2882 2883 def _set_row(self, idx, colnames, vals): 2884 try: 2885 assert len(vals) == len(colnames) 2886 except Exception: 2887 raise ValueError('right hand side must be a sequence of values with ' 2888 'the same length as the number of selected columns') 2889 2890 # Keep track of original values before setting each column so that 2891 # setting row can be transactional. 2892 orig_vals = [] 2893 cols = self.columns 2894 try: 2895 for name, val in zip(colnames, vals): 2896 orig_vals.append(cols[name][idx]) 2897 cols[name][idx] = val 2898 except Exception: 2899 # If anything went wrong first revert the row update then raise 2900 for name, val in zip(colnames, orig_vals[:-1]): 2901 cols[name][idx] = val 2902 raise 2903 2904 def add_row(self, vals=None, mask=None): 2905 """Add a new row to the end of the table. 2906 2907 The ``vals`` argument can be: 2908 2909 sequence (e.g. tuple or list) 2910 Column values in the same order as table columns. 2911 mapping (e.g. dict) 2912 Keys corresponding to column names. Missing values will be 2913 filled with np.zeros for the column dtype. 2914 `None` 2915 All values filled with np.zeros for the column dtype. 2916 2917 This method requires that the Table object "owns" the underlying array 2918 data. In particular one cannot add a row to a Table that was 2919 initialized with copy=False from an existing array. 2920 2921 The ``mask`` attribute should give (if desired) the mask for the 2922 values. The type of the mask should match that of the values, i.e. if 2923 ``vals`` is an iterable, then ``mask`` should also be an iterable 2924 with the same length, and if ``vals`` is a mapping, then ``mask`` 2925 should be a dictionary. 2926 2927 Parameters 2928 ---------- 2929 vals : tuple, list, dict or None 2930 Use the specified values in the new row 2931 mask : tuple, list, dict or None 2932 Use the specified mask values in the new row 2933 2934 Examples 2935 -------- 2936 Create a table with three columns 'a', 'b' and 'c':: 2937 2938 >>> t = Table([[1,2],[4,5],[7,8]], names=('a','b','c')) 2939 >>> print(t) 2940 a b c 2941 --- --- --- 2942 1 4 7 2943 2 5 8 2944 2945 Adding a new row with entries '3' in 'a', '6' in 'b' and '9' in 'c':: 2946 2947 >>> t.add_row([3,6,9]) 2948 >>> print(t) 2949 a b c 2950 --- --- --- 2951 1 4 7 2952 2 5 8 2953 3 6 9 2954 """ 2955 self.insert_row(len(self), vals, mask) 2956 2957 def insert_row(self, index, vals=None, mask=None): 2958 """Add a new row before the given ``index`` position in the table. 2959 2960 The ``vals`` argument can be: 2961 2962 sequence (e.g. tuple or list) 2963 Column values in the same order as table columns. 2964 mapping (e.g. dict) 2965 Keys corresponding to column names. Missing values will be 2966 filled with np.zeros for the column dtype. 2967 `None` 2968 All values filled with np.zeros for the column dtype. 2969 2970 The ``mask`` attribute should give (if desired) the mask for the 2971 values. The type of the mask should match that of the values, i.e. if 2972 ``vals`` is an iterable, then ``mask`` should also be an iterable 2973 with the same length, and if ``vals`` is a mapping, then ``mask`` 2974 should be a dictionary. 2975 2976 Parameters 2977 ---------- 2978 vals : tuple, list, dict or None 2979 Use the specified values in the new row 2980 mask : tuple, list, dict or None 2981 Use the specified mask values in the new row 2982 """ 2983 colnames = self.colnames 2984 2985 N = len(self) 2986 if index < -N or index > N: 2987 raise IndexError("Index {} is out of bounds for table with length {}" 2988 .format(index, N)) 2989 if index < 0: 2990 index += N 2991 2992 if isinstance(vals, Mapping) or vals is None: 2993 # From the vals and/or mask mappings create the corresponding lists 2994 # that have entries for each table column. 2995 if mask is not None and not isinstance(mask, Mapping): 2996 raise TypeError("Mismatch between type of vals and mask") 2997 2998 # Now check that the mask is specified for the same keys as the 2999 # values, otherwise things get really confusing. 3000 if mask is not None and set(vals.keys()) != set(mask.keys()): 3001 raise ValueError('keys in mask should match keys in vals') 3002 3003 if vals and any(name not in colnames for name in vals): 3004 raise ValueError('Keys in vals must all be valid column names') 3005 3006 vals_list = [] 3007 mask_list = [] 3008 3009 for name in colnames: 3010 if vals and name in vals: 3011 vals_list.append(vals[name]) 3012 mask_list.append(False if mask is None else mask[name]) 3013 else: 3014 col = self[name] 3015 if hasattr(col, 'dtype'): 3016 # Make a placeholder zero element of the right type which is masked. 3017 # This assumes the appropriate insert() method will broadcast a 3018 # numpy scalar to the right shape. 3019 vals_list.append(np.zeros(shape=(), dtype=col.dtype)) 3020 3021 # For masked table any unsupplied values are masked by default. 3022 mask_list.append(self.masked and vals is not None) 3023 else: 3024 raise ValueError(f"Value must be supplied for column '{name}'") 3025 3026 vals = vals_list 3027 mask = mask_list 3028 3029 if isiterable(vals): 3030 if mask is not None and (not isiterable(mask) or isinstance(mask, Mapping)): 3031 raise TypeError("Mismatch between type of vals and mask") 3032 3033 if len(self.columns) != len(vals): 3034 raise ValueError('Mismatch between number of vals and columns') 3035 3036 if mask is not None: 3037 if len(self.columns) != len(mask): 3038 raise ValueError('Mismatch between number of masks and columns') 3039 else: 3040 mask = [False] * len(self.columns) 3041 3042 else: 3043 raise TypeError('Vals must be an iterable or mapping or None') 3044 3045 # Insert val at index for each column 3046 columns = self.TableColumns() 3047 for name, col, val, mask_ in zip(colnames, self.columns.values(), vals, mask): 3048 try: 3049 # If new val is masked and the existing column does not support masking 3050 # then upgrade the column to a mask-enabled type: either the table-level 3051 # default ColumnClass or else MaskedColumn. 3052 if mask_ and isinstance(col, Column) and not isinstance(col, MaskedColumn): 3053 col_cls = (self.ColumnClass 3054 if issubclass(self.ColumnClass, self.MaskedColumn) 3055 else self.MaskedColumn) 3056 col = col_cls(col, copy=False) 3057 3058 newcol = col.insert(index, val, axis=0) 3059 3060 if len(newcol) != N + 1: 3061 raise ValueError('Incorrect length for column {} after inserting {}' 3062 ' (expected {}, got {})' 3063 .format(name, val, len(newcol), N + 1)) 3064 newcol.info.parent_table = self 3065 3066 # Set mask if needed and possible 3067 if mask_: 3068 if hasattr(newcol, 'mask'): 3069 newcol[index] = np.ma.masked 3070 else: 3071 raise TypeError("mask was supplied for column '{}' but it does not " 3072 "support masked values".format(col.info.name)) 3073 3074 columns[name] = newcol 3075 3076 except Exception as err: 3077 raise ValueError("Unable to insert row because of exception in column '{}':\n{}" 3078 .format(name, err)) from err 3079 3080 for table_index in self.indices: 3081 table_index.insert_row(index, vals, self.columns.values()) 3082 3083 self._replace_cols(columns) 3084 3085 # Revert groups to default (ungrouped) state 3086 if hasattr(self, '_groups'): 3087 del self._groups 3088 3089 def _replace_cols(self, columns): 3090 for col, new_col in zip(self.columns.values(), columns.values()): 3091 new_col.info.indices = [] 3092 for index in col.info.indices: 3093 index.columns[index.col_position(col.info.name)] = new_col 3094 new_col.info.indices.append(index) 3095 3096 self.columns = columns 3097 3098 def update(self, other, copy=True): 3099 """ 3100 Perform a dictionary-style update and merge metadata. 3101 3102 The argument ``other`` must be a |Table|, or something that can be used 3103 to initialize a table. Columns from (possibly converted) ``other`` are 3104 added to this table. In case of matching column names the column from 3105 this table is replaced with the one from ``other``. 3106 3107 Parameters 3108 ---------- 3109 other : table-like 3110 Data to update this table with. 3111 copy : bool 3112 Whether the updated columns should be copies of or references to 3113 the originals. 3114 3115 See Also 3116 -------- 3117 add_columns, astropy.table.hstack, replace_column 3118 3119 Examples 3120 -------- 3121 Update a table with another table:: 3122 3123 >>> t1 = Table({'a': ['foo', 'bar'], 'b': [0., 0.]}, meta={'i': 0}) 3124 >>> t2 = Table({'b': [1., 2.], 'c': [7., 11.]}, meta={'n': 2}) 3125 >>> t1.update(t2) 3126 >>> t1 3127 <Table length=2> 3128 a b c 3129 str3 float64 float64 3130 ---- ------- ------- 3131 foo 1.0 7.0 3132 bar 2.0 11.0 3133 >>> t1.meta 3134 {'i': 0, 'n': 2} 3135 3136 Update a table with a dictionary:: 3137 3138 >>> t = Table({'a': ['foo', 'bar'], 'b': [0., 0.]}) 3139 >>> t.update({'b': [1., 2.]}) 3140 >>> t 3141 <Table length=2> 3142 a b 3143 str3 float64 3144 ---- ------- 3145 foo 1.0 3146 bar 2.0 3147 """ 3148 from .operations import _merge_table_meta 3149 if not isinstance(other, Table): 3150 other = self.__class__(other, copy=copy) 3151 common_cols = set(self.colnames).intersection(other.colnames) 3152 for name, col in other.items(): 3153 if name in common_cols: 3154 self.replace_column(name, col, copy=copy) 3155 else: 3156 self.add_column(col, name=name, copy=copy) 3157 _merge_table_meta(self, [self, other], metadata_conflicts='silent') 3158 3159 def argsort(self, keys=None, kind=None, reverse=False): 3160 """ 3161 Return the indices which would sort the table according to one or 3162 more key columns. This simply calls the `numpy.argsort` function on 3163 the table with the ``order`` parameter set to ``keys``. 3164 3165 Parameters 3166 ---------- 3167 keys : str or list of str 3168 The column name(s) to order the table by 3169 kind : {'quicksort', 'mergesort', 'heapsort'}, optional 3170 Sorting algorithm. 3171 reverse : bool 3172 Sort in reverse order (default=False) 3173 3174 Returns 3175 ------- 3176 index_array : ndarray, int 3177 Array of indices that sorts the table by the specified key 3178 column(s). 3179 """ 3180 if isinstance(keys, str): 3181 keys = [keys] 3182 3183 # use index sorted order if possible 3184 if keys is not None: 3185 index = get_index(self, names=keys) 3186 if index is not None: 3187 idx = np.asarray(index.sorted_data()) 3188 return idx[::-1] if reverse else idx 3189 3190 kwargs = {} 3191 if keys: 3192 # For multiple keys return a structured array which gets sorted, 3193 # while for a single key return a single ndarray. Sorting a 3194 # one-column structured array is slower than ndarray (e.g. a 3195 # factor of ~6 for a 10 million long random array), and much slower 3196 # for in principle sortable columns like Time, which get stored as 3197 # object arrays. 3198 if len(keys) > 1: 3199 kwargs['order'] = keys 3200 data = self.as_array(names=keys) 3201 else: 3202 data = self[keys[0]] 3203 else: 3204 # No keys provided so sort on all columns. 3205 data = self.as_array() 3206 3207 if kind: 3208 kwargs['kind'] = kind 3209 3210 # np.argsort will look for a possible .argsort method (e.g., for Time), 3211 # and if that fails cast to an array and try sorting that way. 3212 idx = np.argsort(data, **kwargs) 3213 3214 return idx[::-1] if reverse else idx 3215 3216 def sort(self, keys=None, reverse=False): 3217 ''' 3218 Sort the table according to one or more keys. This operates 3219 on the existing table and does not return a new table. 3220 3221 Parameters 3222 ---------- 3223 keys : str or list of str 3224 The key(s) to order the table by. If None, use the 3225 primary index of the Table. 3226 3227 reverse : bool 3228 Sort in reverse order (default=False) 3229 3230 Examples 3231 -------- 3232 Create a table with 3 columns:: 3233 3234 >>> t = Table([['Max', 'Jo', 'John'], ['Miller', 'Miller', 'Jackson'], 3235 ... [12, 15, 18]], names=('firstname', 'name', 'tel')) 3236 >>> print(t) 3237 firstname name tel 3238 --------- ------- --- 3239 Max Miller 12 3240 Jo Miller 15 3241 John Jackson 18 3242 3243 Sorting according to standard sorting rules, first 'name' then 'firstname':: 3244 3245 >>> t.sort(['name', 'firstname']) 3246 >>> print(t) 3247 firstname name tel 3248 --------- ------- --- 3249 John Jackson 18 3250 Jo Miller 15 3251 Max Miller 12 3252 3253 Sorting according to standard sorting rules, first 'firstname' then 'tel', 3254 in reverse order:: 3255 3256 >>> t.sort(['firstname', 'tel'], reverse=True) 3257 >>> print(t) 3258 firstname name tel 3259 --------- ------- --- 3260 Max Miller 12 3261 John Jackson 18 3262 Jo Miller 15 3263 ''' 3264 if keys is None: 3265 if not self.indices: 3266 raise ValueError("Table sort requires input keys or a table index") 3267 keys = [x.info.name for x in self.indices[0].columns] 3268 3269 if isinstance(keys, str): 3270 keys = [keys] 3271 3272 indexes = self.argsort(keys) 3273 3274 if reverse: 3275 indexes = indexes[::-1] 3276 3277 with self.index_mode('freeze'): 3278 for name, col in self.columns.items(): 3279 # Make a new sorted column. This requires that take() also copies 3280 # relevant info attributes for mixin columns. 3281 new_col = col.take(indexes, axis=0) 3282 3283 # First statement in try: will succeed if the column supports an in-place 3284 # update, and matches the legacy behavior of astropy Table. However, 3285 # some mixin classes may not support this, so in that case just drop 3286 # in the entire new column. See #9553 and #9536 for discussion. 3287 try: 3288 col[:] = new_col 3289 except Exception: 3290 # In-place update failed for some reason, exception class not 3291 # predictable for arbitrary mixin. 3292 self[col.info.name] = new_col 3293 3294 def reverse(self): 3295 ''' 3296 Reverse the row order of table rows. The table is reversed 3297 in place and there are no function arguments. 3298 3299 Examples 3300 -------- 3301 Create a table with three columns:: 3302 3303 >>> t = Table([['Max', 'Jo', 'John'], ['Miller','Miller','Jackson'], 3304 ... [12,15,18]], names=('firstname','name','tel')) 3305 >>> print(t) 3306 firstname name tel 3307 --------- ------- --- 3308 Max Miller 12 3309 Jo Miller 15 3310 John Jackson 18 3311 3312 Reversing order:: 3313 3314 >>> t.reverse() 3315 >>> print(t) 3316 firstname name tel 3317 --------- ------- --- 3318 John Jackson 18 3319 Jo Miller 15 3320 Max Miller 12 3321 ''' 3322 for col in self.columns.values(): 3323 # First statement in try: will succeed if the column supports an in-place 3324 # update, and matches the legacy behavior of astropy Table. However, 3325 # some mixin classes may not support this, so in that case just drop 3326 # in the entire new column. See #9836, #9553, and #9536 for discussion. 3327 new_col = col[::-1] 3328 try: 3329 col[:] = new_col 3330 except Exception: 3331 # In-place update failed for some reason, exception class not 3332 # predictable for arbitrary mixin. 3333 self[col.info.name] = new_col 3334 3335 for index in self.indices: 3336 index.reverse() 3337 3338 def round(self, decimals=0): 3339 ''' 3340 Round numeric columns in-place to the specified number of decimals. 3341 Non-numeric columns will be ignored. 3342 3343 Examples 3344 -------- 3345 Create three columns with different types: 3346 3347 >>> t = Table([[1, 4, 5], [-25.55, 12.123, 85], 3348 ... ['a', 'b', 'c']], names=('a', 'b', 'c')) 3349 >>> print(t) 3350 a b c 3351 --- ------ --- 3352 1 -25.55 a 3353 4 12.123 b 3354 5 85.0 c 3355 3356 Round them all to 0: 3357 3358 >>> t.round(0) 3359 >>> print(t) 3360 a b c 3361 --- ----- --- 3362 1 -26.0 a 3363 4 12.0 b 3364 5 85.0 c 3365 3366 Round column 'a' to -1 decimal: 3367 3368 >>> t.round({'a':-1}) 3369 >>> print(t) 3370 a b c 3371 --- ----- --- 3372 0 -26.0 a 3373 0 12.0 b 3374 0 85.0 c 3375 3376 Parameters 3377 ---------- 3378 decimals: int, dict 3379 Number of decimals to round the columns to. If a dict is given, 3380 the columns will be rounded to the number specified as the value. 3381 If a certain column is not in the dict given, it will remain the 3382 same. 3383 ''' 3384 if isinstance(decimals, Mapping): 3385 decimal_values = decimals.values() 3386 column_names = decimals.keys() 3387 elif isinstance(decimals, int): 3388 decimal_values = itertools.repeat(decimals) 3389 column_names = self.colnames 3390 else: 3391 raise ValueError("'decimals' argument must be an int or a dict") 3392 3393 for colname, decimal in zip(column_names, decimal_values): 3394 col = self.columns[colname] 3395 if np.issubdtype(col.info.dtype, np.number): 3396 try: 3397 np.around(col, decimals=decimal, out=col) 3398 except TypeError: 3399 # Bug in numpy see https://github.com/numpy/numpy/issues/15438 3400 col[()] = np.around(col, decimals=decimal) 3401 3402 def copy(self, copy_data=True): 3403 ''' 3404 Return a copy of the table. 3405 3406 Parameters 3407 ---------- 3408 copy_data : bool 3409 If `True` (the default), copy the underlying data array. 3410 Otherwise, use the same data array. The ``meta`` is always 3411 deepcopied regardless of the value for ``copy_data``. 3412 ''' 3413 out = self.__class__(self, copy=copy_data) 3414 3415 # If the current table is grouped then do the same in the copy 3416 if hasattr(self, '_groups'): 3417 out._groups = groups.TableGroups(out, indices=self._groups._indices, 3418 keys=self._groups._keys) 3419 return out 3420 3421 def __deepcopy__(self, memo=None): 3422 return self.copy(True) 3423 3424 def __copy__(self): 3425 return self.copy(False) 3426 3427 def __lt__(self, other): 3428 return super().__lt__(other) 3429 3430 def __gt__(self, other): 3431 return super().__gt__(other) 3432 3433 def __le__(self, other): 3434 return super().__le__(other) 3435 3436 def __ge__(self, other): 3437 return super().__ge__(other) 3438 3439 def __eq__(self, other): 3440 return self._rows_equal(other) 3441 3442 def __ne__(self, other): 3443 return ~self.__eq__(other) 3444 3445 def _rows_equal(self, other): 3446 """ 3447 Row-wise comparison of table with any other object. 3448 3449 This is actual implementation for __eq__. 3450 3451 Returns a 1-D boolean numpy array showing result of row-wise comparison. 3452 This is the same as the ``==`` comparison for tables. 3453 3454 Parameters 3455 ---------- 3456 other : Table or DataFrame or ndarray 3457 An object to compare with table 3458 3459 Examples 3460 -------- 3461 Comparing one Table with other:: 3462 3463 >>> t1 = Table([[1,2],[4,5],[7,8]], names=('a','b','c')) 3464 >>> t2 = Table([[1,2],[4,5],[7,8]], names=('a','b','c')) 3465 >>> t1._rows_equal(t2) 3466 array([ True, True]) 3467 3468 """ 3469 3470 if isinstance(other, Table): 3471 other = other.as_array() 3472 3473 if self.has_masked_columns: 3474 if isinstance(other, np.ma.MaskedArray): 3475 result = self.as_array() == other 3476 else: 3477 # If mask is True, then by definition the row doesn't match 3478 # because the other array is not masked. 3479 false_mask = np.zeros(1, dtype=[(n, bool) for n in self.dtype.names]) 3480 result = (self.as_array().data == other) & (self.mask == false_mask) 3481 else: 3482 if isinstance(other, np.ma.MaskedArray): 3483 # If mask is True, then by definition the row doesn't match 3484 # because the other array is not masked. 3485 false_mask = np.zeros(1, dtype=[(n, bool) for n in other.dtype.names]) 3486 result = (self.as_array() == other.data) & (other.mask == false_mask) 3487 else: 3488 result = self.as_array() == other 3489 3490 return result 3491 3492 def values_equal(self, other): 3493 """ 3494 Element-wise comparison of table with another table, list, or scalar. 3495 3496 Returns a ``Table`` with the same columns containing boolean values 3497 showing result of comparison. 3498 3499 Parameters 3500 ---------- 3501 other : table-like object or list or scalar 3502 Object to compare with table 3503 3504 Examples 3505 -------- 3506 Compare one Table with other:: 3507 3508 >>> t1 = Table([[1, 2], [4, 5], [-7, 8]], names=('a', 'b', 'c')) 3509 >>> t2 = Table([[1, 2], [-4, 5], [7, 8]], names=('a', 'b', 'c')) 3510 >>> t1.values_equal(t2) 3511 <Table length=2> 3512 a b c 3513 bool bool bool 3514 ---- ----- ----- 3515 True False False 3516 True True True 3517 3518 """ 3519 if isinstance(other, Table): 3520 names = other.colnames 3521 else: 3522 try: 3523 other = Table(other, copy=False) 3524 names = other.colnames 3525 except Exception: 3526 # Broadcast other into a dict, so e.g. other = 2 will turn into 3527 # other = {'a': 2, 'b': 2} and then equality does a 3528 # column-by-column broadcasting. 3529 names = self.colnames 3530 other = {name: other for name in names} 3531 3532 # Require column names match but do not require same column order 3533 if set(self.colnames) != set(names): 3534 raise ValueError('cannot compare tables with different column names') 3535 3536 eqs = [] 3537 for name in names: 3538 try: 3539 np.broadcast(self[name], other[name]) # Check if broadcast-able 3540 # Catch the numpy FutureWarning related to equality checking, 3541 # "elementwise comparison failed; returning scalar instead, but 3542 # in the future will perform elementwise comparison". Turn this 3543 # into an exception since the scalar answer is not what we want. 3544 with warnings.catch_warnings(record=True) as warns: 3545 warnings.simplefilter('always') 3546 eq = self[name] == other[name] 3547 if (warns and issubclass(warns[-1].category, FutureWarning) 3548 and 'elementwise comparison failed' in str(warns[-1].message)): 3549 raise FutureWarning(warns[-1].message) 3550 except Exception as err: 3551 raise ValueError(f'unable to compare column {name}') from err 3552 3553 # Be strict about the result from the comparison. E.g. SkyCoord __eq__ is just 3554 # broken and completely ignores that it should return an array. 3555 if not (isinstance(eq, np.ndarray) 3556 and eq.dtype is np.dtype('bool') 3557 and len(eq) == len(self)): 3558 raise TypeError(f'comparison for column {name} returned {eq} ' 3559 f'instead of the expected boolean ndarray') 3560 3561 eqs.append(eq) 3562 3563 out = Table(eqs, names=names) 3564 3565 return out 3566 3567 @property 3568 def groups(self): 3569 if not hasattr(self, '_groups'): 3570 self._groups = groups.TableGroups(self) 3571 return self._groups 3572 3573 def group_by(self, keys): 3574 """ 3575 Group this table by the specified ``keys`` 3576 3577 This effectively splits the table into groups which correspond to unique 3578 values of the ``keys`` grouping object. The output is a new 3579 `~astropy.table.TableGroups` which contains a copy of this table but 3580 sorted by row according to ``keys``. 3581 3582 The ``keys`` input to `group_by` can be specified in different ways: 3583 3584 - String or list of strings corresponding to table column name(s) 3585 - Numpy array (homogeneous or structured) with same length as this table 3586 - `~astropy.table.Table` with same length as this table 3587 3588 Parameters 3589 ---------- 3590 keys : str, list of str, numpy array, or `~astropy.table.Table` 3591 Key grouping object 3592 3593 Returns 3594 ------- 3595 out : `~astropy.table.Table` 3596 New table with groups set 3597 """ 3598 return groups.table_group_by(self, keys) 3599 3600 def to_pandas(self, index=None, use_nullable_int=True): 3601 """ 3602 Return a :class:`pandas.DataFrame` instance 3603 3604 The index of the created DataFrame is controlled by the ``index`` 3605 argument. For ``index=True`` or the default ``None``, an index will be 3606 specified for the DataFrame if there is a primary key index on the 3607 Table *and* if it corresponds to a single column. If ``index=False`` 3608 then no DataFrame index will be specified. If ``index`` is the name of 3609 a column in the table then that will be the DataFrame index. 3610 3611 In addition to vanilla columns or masked columns, this supports Table 3612 mixin columns like Quantity, Time, or SkyCoord. In many cases these 3613 objects have no analog in pandas and will be converted to a "encoded" 3614 representation using only Column or MaskedColumn. The exception is 3615 Time or TimeDelta columns, which will be converted to the corresponding 3616 representation in pandas using ``np.datetime64`` or ``np.timedelta64``. 3617 See the example below. 3618 3619 Parameters 3620 ---------- 3621 index : None, bool, str 3622 Specify DataFrame index mode 3623 use_nullable_int : bool, default=True 3624 Convert integer MaskedColumn to pandas nullable integer type. 3625 If ``use_nullable_int=False`` or the pandas version does not support 3626 nullable integer types (version < 0.24), then the column is converted 3627 to float with NaN for missing elements and a warning is issued. 3628 3629 Returns 3630 ------- 3631 dataframe : :class:`pandas.DataFrame` 3632 A pandas :class:`pandas.DataFrame` instance 3633 3634 Raises 3635 ------ 3636 ImportError 3637 If pandas is not installed 3638 ValueError 3639 If the Table has multi-dimensional columns 3640 3641 Examples 3642 -------- 3643 Here we convert a table with a few mixins to a 3644 :class:`pandas.DataFrame` instance. 3645 3646 >>> import pandas as pd 3647 >>> from astropy.table import QTable 3648 >>> import astropy.units as u 3649 >>> from astropy.time import Time, TimeDelta 3650 >>> from astropy.coordinates import SkyCoord 3651 3652 >>> q = [1, 2] * u.m 3653 >>> tm = Time([1998, 2002], format='jyear') 3654 >>> sc = SkyCoord([5, 6], [7, 8], unit='deg') 3655 >>> dt = TimeDelta([3, 200] * u.s) 3656 3657 >>> t = QTable([q, tm, sc, dt], names=['q', 'tm', 'sc', 'dt']) 3658 3659 >>> df = t.to_pandas(index='tm') 3660 >>> with pd.option_context('display.max_columns', 20): 3661 ... print(df) 3662 q sc.ra sc.dec dt 3663 tm 3664 1998-01-01 1.0 5.0 7.0 0 days 00:00:03 3665 2002-01-01 2.0 6.0 8.0 0 days 00:03:20 3666 3667 """ 3668 from pandas import DataFrame, Series 3669 3670 if index is not False: 3671 if index in (None, True): 3672 # Default is to use the table primary key if available and a single column 3673 if self.primary_key and len(self.primary_key) == 1: 3674 index = self.primary_key[0] 3675 else: 3676 index = False 3677 else: 3678 if index not in self.colnames: 3679 raise ValueError('index must be None, False, True or a table ' 3680 'column name') 3681 3682 def _encode_mixins(tbl): 3683 """Encode a Table ``tbl`` that may have mixin columns to a Table with only 3684 astropy Columns + appropriate meta-data to allow subsequent decoding. 3685 """ 3686 from . import serialize 3687 from astropy.time import TimeBase, TimeDelta 3688 3689 # Convert any Time or TimeDelta columns and pay attention to masking 3690 time_cols = [col for col in tbl.itercols() if isinstance(col, TimeBase)] 3691 if time_cols: 3692 3693 # Make a light copy of table and clear any indices 3694 new_cols = [] 3695 for col in tbl.itercols(): 3696 new_col = col_copy(col, copy_indices=False) if col.info.indices else col 3697 new_cols.append(new_col) 3698 tbl = tbl.__class__(new_cols, copy=False) 3699 3700 # Certain subclasses (e.g. TimeSeries) may generate new indices on 3701 # table creation, so make sure there are no indices on the table. 3702 for col in tbl.itercols(): 3703 col.info.indices.clear() 3704 3705 for col in time_cols: 3706 if isinstance(col, TimeDelta): 3707 # Convert to nanoseconds (matches astropy datetime64 support) 3708 new_col = (col.sec * 1e9).astype('timedelta64[ns]') 3709 nat = np.timedelta64('NaT') 3710 else: 3711 new_col = col.datetime64.copy() 3712 nat = np.datetime64('NaT') 3713 if col.masked: 3714 new_col[col.mask] = nat 3715 tbl[col.info.name] = new_col 3716 3717 # Convert the table to one with no mixins, only Column objects. 3718 encode_tbl = serialize.represent_mixins_as_columns(tbl) 3719 return encode_tbl 3720 3721 tbl = _encode_mixins(self) 3722 3723 badcols = [name for name, col in self.columns.items() if len(col.shape) > 1] 3724 if badcols: 3725 raise ValueError( 3726 f'Cannot convert a table with multidimensional columns to a ' 3727 f'pandas DataFrame. Offending columns are: {badcols}\n' 3728 f'One can filter out such columns using:\n' 3729 f'names = [name for name in tbl.colnames if len(tbl[name].shape) <= 1]\n' 3730 f'tbl[names].to_pandas(...)') 3731 3732 out = OrderedDict() 3733 3734 for name, column in tbl.columns.items(): 3735 if getattr(column.dtype, 'isnative', True): 3736 out[name] = column 3737 else: 3738 out[name] = column.data.byteswap().newbyteorder('=') 3739 3740 if isinstance(column, MaskedColumn) and np.any(column.mask): 3741 if column.dtype.kind in ['i', 'u']: 3742 pd_dtype = column.dtype.name 3743 if use_nullable_int: 3744 # Convert int64 to Int64, uint32 to UInt32, etc for nullable types 3745 pd_dtype = pd_dtype.replace('i', 'I').replace('u', 'U') 3746 out[name] = Series(out[name], dtype=pd_dtype) 3747 3748 # If pandas is older than 0.24 the type may have turned to float 3749 if column.dtype.kind != out[name].dtype.kind: 3750 warnings.warn( 3751 f"converted column '{name}' from {column.dtype} to {out[name].dtype}", 3752 TableReplaceWarning, stacklevel=3) 3753 elif column.dtype.kind not in ['f', 'c']: 3754 out[name] = column.astype(object).filled(np.nan) 3755 3756 kwargs = {} 3757 3758 if index: 3759 idx = out.pop(index) 3760 3761 kwargs['index'] = idx 3762 3763 # We add the table index to Series inputs (MaskedColumn with int values) to override 3764 # its default RangeIndex, see #11432 3765 for v in out.values(): 3766 if isinstance(v, Series): 3767 v.index = idx 3768 3769 df = DataFrame(out, **kwargs) 3770 if index: 3771 # Explicitly set the pandas DataFrame index to the original table 3772 # index name. 3773 df.index.name = idx.info.name 3774 3775 return df 3776 3777 @classmethod 3778 def from_pandas(cls, dataframe, index=False, units=None): 3779 """ 3780 Create a `~astropy.table.Table` from a :class:`pandas.DataFrame` instance 3781 3782 In addition to converting generic numeric or string columns, this supports 3783 conversion of pandas Date and Time delta columns to `~astropy.time.Time` 3784 and `~astropy.time.TimeDelta` columns, respectively. 3785 3786 Parameters 3787 ---------- 3788 dataframe : :class:`pandas.DataFrame` 3789 A pandas :class:`pandas.DataFrame` instance 3790 index : bool 3791 Include the index column in the returned table (default=False) 3792 units: dict 3793 A dict mapping column names to to a `~astropy.units.Unit`. 3794 The columns will have the specified unit in the Table. 3795 3796 Returns 3797 ------- 3798 table : `~astropy.table.Table` 3799 A `~astropy.table.Table` (or subclass) instance 3800 3801 Raises 3802 ------ 3803 ImportError 3804 If pandas is not installed 3805 3806 Examples 3807 -------- 3808 Here we convert a :class:`pandas.DataFrame` instance 3809 to a `~astropy.table.QTable`. 3810 3811 >>> import numpy as np 3812 >>> import pandas as pd 3813 >>> from astropy.table import QTable 3814 3815 >>> time = pd.Series(['1998-01-01', '2002-01-01'], dtype='datetime64[ns]') 3816 >>> dt = pd.Series(np.array([1, 300], dtype='timedelta64[s]')) 3817 >>> df = pd.DataFrame({'time': time}) 3818 >>> df['dt'] = dt 3819 >>> df['x'] = [3., 4.] 3820 >>> with pd.option_context('display.max_columns', 20): 3821 ... print(df) 3822 time dt x 3823 0 1998-01-01 0 days 00:00:01 3.0 3824 1 2002-01-01 0 days 00:05:00 4.0 3825 3826 >>> QTable.from_pandas(df) 3827 <QTable length=2> 3828 time dt x 3829 Time TimeDelta float64 3830 ----------------------- --------- ------- 3831 1998-01-01T00:00:00.000 1.0 3.0 3832 2002-01-01T00:00:00.000 300.0 4.0 3833 3834 """ 3835 3836 out = OrderedDict() 3837 3838 names = list(dataframe.columns) 3839 columns = [dataframe[name] for name in names] 3840 datas = [np.array(column) for column in columns] 3841 masks = [np.array(column.isnull()) for column in columns] 3842 3843 if index: 3844 index_name = dataframe.index.name or 'index' 3845 while index_name in names: 3846 index_name = '_' + index_name + '_' 3847 names.insert(0, index_name) 3848 columns.insert(0, dataframe.index) 3849 datas.insert(0, np.array(dataframe.index)) 3850 masks.insert(0, np.zeros(len(dataframe), dtype=bool)) 3851 3852 if units is None: 3853 units = [None] * len(names) 3854 else: 3855 if not isinstance(units, Mapping): 3856 raise TypeError('Expected a Mapping "column-name" -> "unit"') 3857 3858 not_found = set(units.keys()) - set(names) 3859 if not_found: 3860 warnings.warn(f'`units` contains additional columns: {not_found}') 3861 3862 units = [units.get(name) for name in names] 3863 3864 for name, column, data, mask, unit in zip(names, columns, datas, masks, units): 3865 3866 if column.dtype.kind in ['u', 'i'] and np.any(mask): 3867 # Special-case support for pandas nullable int 3868 np_dtype = str(column.dtype).lower() 3869 data = np.zeros(shape=column.shape, dtype=np_dtype) 3870 data[~mask] = column[~mask] 3871 out[name] = MaskedColumn(data=data, name=name, mask=mask, unit=unit, copy=False) 3872 continue 3873 3874 if data.dtype.kind == 'O': 3875 # If all elements of an object array are string-like or np.nan 3876 # then coerce back to a native numpy str/unicode array. 3877 string_types = (str, bytes) 3878 nan = np.nan 3879 if all(isinstance(x, string_types) or x is nan for x in data): 3880 # Force any missing (null) values to b''. Numpy will 3881 # upcast to str/unicode as needed. 3882 data[mask] = b'' 3883 3884 # When the numpy object array is represented as a list then 3885 # numpy initializes to the correct string or unicode type. 3886 data = np.array([x for x in data]) 3887 3888 # Numpy datetime64 3889 if data.dtype.kind == 'M': 3890 from astropy.time import Time 3891 out[name] = Time(data, format='datetime64') 3892 if np.any(mask): 3893 out[name][mask] = np.ma.masked 3894 out[name].format = 'isot' 3895 3896 # Numpy timedelta64 3897 elif data.dtype.kind == 'm': 3898 from astropy.time import TimeDelta 3899 data_sec = data.astype('timedelta64[ns]').astype(np.float64) / 1e9 3900 out[name] = TimeDelta(data_sec, format='sec') 3901 if np.any(mask): 3902 out[name][mask] = np.ma.masked 3903 3904 else: 3905 if np.any(mask): 3906 out[name] = MaskedColumn(data=data, name=name, mask=mask, unit=unit) 3907 else: 3908 out[name] = Column(data=data, name=name, unit=unit) 3909 3910 return cls(out) 3911 3912 info = TableInfo() 3913 3914 3915class QTable(Table): 3916 """A class to represent tables of heterogeneous data. 3917 3918 `~astropy.table.QTable` provides a class for heterogeneous tabular data 3919 which can be easily modified, for instance adding columns or new rows. 3920 3921 The `~astropy.table.QTable` class is identical to `~astropy.table.Table` 3922 except that columns with an associated ``unit`` attribute are converted to 3923 `~astropy.units.Quantity` objects. 3924 3925 See also: 3926 3927 - https://docs.astropy.org/en/stable/table/ 3928 - https://docs.astropy.org/en/stable/table/mixin_columns.html 3929 3930 Parameters 3931 ---------- 3932 data : numpy ndarray, dict, list, table-like object, optional 3933 Data to initialize table. 3934 masked : bool, optional 3935 Specify whether the table is masked. 3936 names : list, optional 3937 Specify column names. 3938 dtype : list, optional 3939 Specify column data types. 3940 meta : dict, optional 3941 Metadata associated with the table. 3942 copy : bool, optional 3943 Copy the input data. Default is True. 3944 rows : numpy ndarray, list of list, optional 3945 Row-oriented data for table instead of ``data`` argument. 3946 copy_indices : bool, optional 3947 Copy any indices in the input data. Default is True. 3948 **kwargs : dict, optional 3949 Additional keyword args when converting table-like object. 3950 3951 """ 3952 3953 def _is_mixin_for_table(self, col): 3954 """ 3955 Determine if ``col`` should be added to the table directly as 3956 a mixin column. 3957 """ 3958 return has_info_class(col, MixinInfo) 3959 3960 def _convert_col_for_table(self, col): 3961 if isinstance(col, Column) and getattr(col, 'unit', None) is not None: 3962 # We need to turn the column into a quantity; use subok=True to allow 3963 # Quantity subclasses identified in the unit (such as u.mag()). 3964 q_cls = Masked(Quantity) if isinstance(col, MaskedColumn) else Quantity 3965 try: 3966 qcol = q_cls(col.data, col.unit, copy=False, subok=True) 3967 except Exception as exc: 3968 warnings.warn(f"column {col.info.name} has a unit but is kept as " 3969 f"a {col.__class__.__name__} as an attempt to " 3970 f"convert it to Quantity failed with:\n{exc!r}", 3971 AstropyUserWarning) 3972 else: 3973 qcol.info = col.info 3974 qcol.info.indices = col.info.indices 3975 col = qcol 3976 else: 3977 col = super()._convert_col_for_table(col) 3978 3979 return col 3980