1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
3import itertools
4import warnings
5import weakref
6
7from copy import deepcopy
8
9import numpy as np
10from numpy import ma
11
12from astropy.units import Unit, Quantity
13from astropy.utils.console import color_print
14from astropy.utils.metadata import MetaData
15from astropy.utils.data_info import BaseColumnInfo, dtype_info_name
16from astropy.utils.misc import dtype_bytes_or_chars
17from . import groups
18from . import pprint
19from .np_utils import fix_column_name
20
21# These "shims" provide __getitem__ implementations for Column and MaskedColumn
22from ._column_mixins import _ColumnGetitemShim, _MaskedColumnGetitemShim
23
24# Create a generic TableFormatter object for use by bare columns with no
25# parent table.
26FORMATTER = pprint.TableFormatter()
27
28
29class StringTruncateWarning(UserWarning):
30    """
31    Warning class for when a string column is assigned a value
32    that gets truncated because the base (numpy) string length
33    is too short.
34
35    This does not inherit from AstropyWarning because we want to use
36    stacklevel=2 to show the user where the issue occurred in their code.
37    """
38    pass
39
40
41# Always emit this warning, not just the first instance
42warnings.simplefilter('always', StringTruncateWarning)
43
44
45def _auto_names(n_cols):
46    from . import conf
47    return [str(conf.auto_colname).format(i) for i in range(n_cols)]
48
49
50# list of one and two-dimensional comparison functions, which sometimes return
51# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure
52# they only return plain (masked) arrays (see #1446 and #1685)
53_comparison_functions = set(
54    [np.greater, np.greater_equal, np.less, np.less_equal,
55     np.not_equal, np.equal,
56     np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
57
58
59def col_copy(col, copy_indices=True):
60    """
61    Mixin-safe version of Column.copy() (with copy_data=True).
62
63    Parameters
64    ----------
65    col : Column or mixin column
66        Input column
67    copy_indices : bool
68        Copy the column ``indices`` attribute
69
70    Returns
71    -------
72    col : Copy of input column
73    """
74    if isinstance(col, BaseColumn):
75        return col.copy()
76
77    newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col)
78    # If the column has info defined, we copy it and adjust any indices
79    # to point to the copied column.  By guarding with the if statement,
80    # we avoid side effects (of creating the default info instance).
81    if 'info' in col.__dict__:
82        newcol.info = col.info
83        if copy_indices and col.info.indices:
84            newcol.info.indices = deepcopy(col.info.indices)
85            for index in newcol.info.indices:
86                index.replace_col(col, newcol)
87
88    return newcol
89
90
91class FalseArray(np.ndarray):
92    """
93    Boolean mask array that is always False.
94
95    This is used to create a stub ``mask`` property which is a boolean array of
96    ``False`` used by default for mixin columns and corresponding to the mixin
97    column data shape.  The ``mask`` looks like a normal numpy array but an
98    exception will be raised if ``True`` is assigned to any element.  The
99    consequences of the limitation are most obvious in the high-level table
100    operations.
101
102    Parameters
103    ----------
104    shape : tuple
105        Data shape
106    """
107    def __new__(cls, shape):
108        obj = np.zeros(shape, dtype=bool).view(cls)
109        return obj
110
111    def __setitem__(self, item, val):
112        val = np.asarray(val)
113        if np.any(val):
114            raise ValueError('Cannot set any element of {} class to True'
115                             .format(self.__class__.__name__))
116
117
118def _expand_string_array_for_values(arr, values):
119    """
120    For string-dtype return a version of ``arr`` that is wide enough for ``values``.
121    If ``arr`` is not string-dtype or does not need expansion then return ``arr``.
122
123    Parameters
124    ----------
125    arr : np.ndarray
126        Input array
127    values : scalar or array-like
128        Values for width comparison for string arrays
129
130    Returns
131    -------
132    arr_expanded : np.ndarray
133
134    """
135    if arr.dtype.kind in ('U', 'S') and values is not np.ma.masked:
136        # Find the length of the longest string in the new values.
137        values_str_len = np.char.str_len(values).max()
138
139        # Determine character repeat count of arr.dtype.  Returns a positive
140        # int or None (something like 'U0' is not possible in numpy).  If new values
141        # are longer than current then make a new (wider) version of arr.
142        arr_str_len = dtype_bytes_or_chars(arr.dtype)
143        if arr_str_len and values_str_len > arr_str_len:
144            arr_dtype = arr.dtype.byteorder + arr.dtype.kind + str(values_str_len)
145            arr = arr.astype(arr_dtype)
146
147    return arr
148
149
150def _convert_sequence_data_to_array(data, dtype=None):
151    """Convert N-d sequence-like data to ndarray or MaskedArray.
152
153    This is the core function for converting Python lists or list of lists to a
154    numpy array. This handles embedded np.ma.masked constants in ``data`` along
155    with the special case of an homogeneous list of MaskedArray elements.
156
157    Considerations:
158
159    - np.ma.array is about 50 times slower than np.array for list input. This
160      function avoids using np.ma.array on list input.
161    - np.array emits a UserWarning for embedded np.ma.masked, but only for int
162      or float inputs. For those it converts to np.nan and forces float dtype.
163      For other types np.array is inconsistent, for instance converting
164      np.ma.masked to "0.0" for str types.
165    - Searching in pure Python for np.ma.masked in ``data`` is comparable in
166      speed to calling ``np.array(data)``.
167    - This function may end up making two additional copies of input ``data``.
168
169    Parameters
170    ----------
171    data : N-d sequence
172        Input data, typically list or list of lists
173    dtype : None or dtype-like
174        Output datatype (None lets np.array choose)
175
176    Returns
177    -------
178    np_data : np.ndarray or np.ma.MaskedArray
179
180    """
181    np_ma_masked = np.ma.masked  # Avoid repeated lookups of this object
182
183    # Special case of an homogeneous list of MaskedArray elements (see #8977).
184    # np.ma.masked is an instance of MaskedArray, so exclude those values.
185    if (hasattr(data, '__len__')
186        and len(data) > 0
187        and all(isinstance(val, np.ma.MaskedArray)
188                and val is not np_ma_masked for val in data)):
189        np_data = np.ma.array(data, dtype=dtype)
190        return np_data
191
192    # First convert data to a plain ndarray. If there are instances of np.ma.masked
193    # in the data this will issue a warning for int and float.
194    with warnings.catch_warnings(record=True) as warns:
195        # Ensure this warning from numpy is always enabled and that it is not
196        # converted to an error (which can happen during pytest).
197        warnings.filterwarnings('always', category=UserWarning,
198                                message='.*converting a masked element.*')
199        # FutureWarning in numpy 1.21. See https://github.com/astropy/astropy/issues/11291
200        # and https://github.com/numpy/numpy/issues/18425.
201        warnings.filterwarnings('always', category=FutureWarning,
202                                message='.*Promotion of numbers and bools to strings.*')
203        try:
204            np_data = np.array(data, dtype=dtype)
205        except np.ma.MaskError:
206            # Catches case of dtype=int with masked values, instead let it
207            # convert to float
208            np_data = np.array(data)
209        except Exception:
210            # Conversion failed for some reason, e.g. [2, 1*u.m] gives TypeError in Quantity.
211            # First try to interpret the data as Quantity. If that still fails then fall
212            # through to object
213            try:
214                np_data = Quantity(data, dtype)
215            except Exception:
216                dtype = object
217                np_data = np.array(data, dtype=dtype)
218
219    if np_data.ndim == 0 or (np_data.ndim > 0 and len(np_data) == 0):
220        # Implies input was a scalar or an empty list (e.g. initializing an
221        # empty table with pre-declared names and dtypes but no data).  Here we
222        # need to fall through to initializing with the original data=[].
223        return data
224
225    # If there were no warnings and the data are int or float, then we are done.
226    # Other dtypes like string or complex can have masked values and the
227    # np.array() conversion gives the wrong answer (e.g. converting np.ma.masked
228    # to the string "0.0").
229    if len(warns) == 0 and np_data.dtype.kind in ('i', 'f'):
230        return np_data
231
232    # Now we need to determine if there is an np.ma.masked anywhere in input data.
233
234    # Make a statement like below to look for np.ma.masked in a nested sequence.
235    # Because np.array(data) succeeded we know that `data` has a regular N-d
236    # structure. Find ma_masked:
237    #   any(any(any(d2 is ma_masked for d2 in d1) for d1 in d0) for d0 in data)
238    # Using this eval avoids creating a copy of `data` in the more-usual case of
239    # no masked elements.
240    any_statement = 'd0 is ma_masked'
241    for ii in reversed(range(np_data.ndim)):
242        if ii == 0:
243            any_statement = f'any({any_statement} for d0 in data)'
244        elif ii == np_data.ndim - 1:
245            any_statement = f'any(d{ii} is ma_masked for d{ii} in d{ii-1})'
246        else:
247            any_statement = f'any({any_statement} for d{ii} in d{ii-1})'
248    context = {'ma_masked': np.ma.masked, 'data': data}
249    has_masked = eval(any_statement, context)
250
251    # If there are any masks then explicitly change each one to a fill value and
252    # set a mask boolean array. If not has_masked then we're done.
253    if has_masked:
254        mask = np.zeros(np_data.shape, dtype=bool)
255        data_filled = np.array(data, dtype=object)
256
257        # Make type-appropriate fill value based on initial conversion.
258        if np_data.dtype.kind == 'U':
259            fill = ''
260        elif np_data.dtype.kind == 'S':
261            fill = b''
262        else:
263            # Zero works for every numeric type.
264            fill = 0
265
266        ranges = [range(dim) for dim in np_data.shape]
267        for idxs in itertools.product(*ranges):
268            val = data_filled[idxs]
269            if val is np_ma_masked:
270                data_filled[idxs] = fill
271                mask[idxs] = True
272            elif isinstance(val, bool) and dtype is None:
273                # If we see a bool and dtype not specified then assume bool for
274                # the entire array. Not perfect but in most practical cases OK.
275                # Unfortunately numpy types [False, 0] as int, not bool (and
276                # [False, np.ma.masked] => array([0.0, np.nan])).
277                dtype = bool
278
279        # If no dtype is provided then need to convert back to list so np.array
280        # does type autodetection.
281        if dtype is None:
282            data_filled = data_filled.tolist()
283
284        # Use np.array first to convert `data` to ndarray (fast) and then make
285        # masked array from an ndarray with mask (fast) instead of from `data`.
286        np_data = np.ma.array(np.array(data_filled, dtype=dtype), mask=mask)
287
288    return np_data
289
290
291def _make_compare(oper):
292    """
293    Make Column comparison methods which encode the ``other`` object to utf-8
294    in the case of a bytestring dtype for Py3+.
295
296    Parameters
297    ----------
298    oper : str
299        Operator name
300    """
301    swapped_oper = {'__eq__': '__eq__',
302                    '__ne__': '__ne__',
303                    '__gt__': '__lt__',
304                    '__lt__': '__gt__',
305                    '__ge__': '__le__',
306                    '__le__': '__ge__'}[oper]
307
308    def _compare(self, other):
309        op = oper  # copy enclosed ref to allow swap below
310
311        # Special case to work around #6838.  Other combinations work OK,
312        # see tests.test_column.test_unicode_sandwich_compare().  In this
313        # case just swap self and other.
314        #
315        # This is related to an issue in numpy that was addressed in np 1.13.
316        # However that fix does not make this problem go away, but maybe
317        # future numpy versions will do so.  NUMPY_LT_1_13 to get the
318        # attention of future maintainers to check (by deleting or versioning
319        # the if block below).  See #6899 discussion.
320        # 2019-06-21: still needed with numpy 1.16.
321        if (isinstance(self, MaskedColumn) and self.dtype.kind == 'U'
322                and isinstance(other, MaskedColumn) and other.dtype.kind == 'S'):
323            self, other = other, self
324            op = swapped_oper
325
326        if self.dtype.char == 'S':
327            other = self._encode_str(other)
328
329        # Now just let the regular ndarray.__eq__, etc., take over.
330        result = getattr(super(Column, self), op)(other)
331        # But we should not return Column instances for this case.
332        return result.data if isinstance(result, Column) else result
333
334    return _compare
335
336
337class ColumnInfo(BaseColumnInfo):
338    """
339    Container for meta information like name, description, format.
340
341    This is required when the object is used as a mixin column within a table,
342    but can be used as a general way to store meta information.
343    """
344    attrs_from_parent = BaseColumnInfo.attr_names
345    _supports_indexing = True
346
347    def new_like(self, cols, length, metadata_conflicts='warn', name=None):
348        """
349        Return a new Column instance which is consistent with the
350        input ``cols`` and has ``length`` rows.
351
352        This is intended for creating an empty column object whose elements can
353        be set in-place for table operations like join or vstack.
354
355        Parameters
356        ----------
357        cols : list
358            List of input columns
359        length : int
360            Length of the output column object
361        metadata_conflicts : str ('warn'|'error'|'silent')
362            How to handle metadata conflicts
363        name : str
364            Output column name
365
366        Returns
367        -------
368        col : Column (or subclass)
369            New instance of this class consistent with ``cols``
370
371        """
372        attrs = self.merge_cols_attributes(cols, metadata_conflicts, name,
373                                           ('meta', 'unit', 'format', 'description'))
374
375        return self._parent_cls(length=length, **attrs)
376
377    def get_sortable_arrays(self):
378        """
379        Return a list of arrays which can be lexically sorted to represent
380        the order of the parent column.
381
382        For Column this is just the column itself.
383
384        Returns
385        -------
386        arrays : list of ndarray
387        """
388        return [self._parent]
389
390
391class BaseColumn(_ColumnGetitemShim, np.ndarray):
392
393    meta = MetaData()
394
395    def __new__(cls, data=None, name=None,
396                dtype=None, shape=(), length=0,
397                description=None, unit=None, format=None, meta=None,
398                copy=False, copy_indices=True):
399        if data is None:
400            self_data = np.zeros((length,)+shape, dtype=dtype)
401        elif isinstance(data, BaseColumn) and hasattr(data, '_name'):
402            # When unpickling a MaskedColumn, ``data`` will be a bare
403            # BaseColumn with none of the expected attributes.  In this case
404            # do NOT execute this block which initializes from ``data``
405            # attributes.
406            self_data = np.array(data.data, dtype=dtype, copy=copy)
407            if description is None:
408                description = data.description
409            if unit is None:
410                unit = unit or data.unit
411            if format is None:
412                format = data.format
413            if meta is None:
414                meta = data.meta
415            if name is None:
416                name = data.name
417        elif isinstance(data, Quantity):
418            if unit is None:
419                self_data = np.array(data, dtype=dtype, copy=copy)
420                unit = data.unit
421            else:
422                self_data = Quantity(data, unit, dtype=dtype, copy=copy).value
423            # If 'info' has been defined, copy basic properties (if needed).
424            if 'info' in data.__dict__:
425                if description is None:
426                    description = data.info.description
427                if format is None:
428                    format = data.info.format
429                if meta is None:
430                    meta = data.info.meta
431
432        else:
433            if np.dtype(dtype).char == 'S':
434                data = cls._encode_str(data)
435            self_data = np.array(data, dtype=dtype, copy=copy)
436
437        self = self_data.view(cls)
438        self._name = fix_column_name(name)
439        self._parent_table = None
440        self.unit = unit
441        self._format = format
442        self.description = description
443        self.meta = meta
444        self.indices = deepcopy(getattr(data, 'indices', [])) if copy_indices else []
445        for index in self.indices:
446            index.replace_col(data, self)
447
448        return self
449
450    @property
451    def data(self):
452        return self.view(np.ndarray)
453
454    @property
455    def value(self):
456        return self.data
457
458    @property
459    def parent_table(self):
460        # Note: It seems there are some cases where _parent_table is not set,
461        # such after restoring from a pickled Column.  Perhaps that should be
462        # fixed, but this is also okay for now.
463        if getattr(self, '_parent_table', None) is None:
464            return None
465        else:
466            return self._parent_table()
467
468    @parent_table.setter
469    def parent_table(self, table):
470        if table is None:
471            self._parent_table = None
472        else:
473            self._parent_table = weakref.ref(table)
474
475    info = ColumnInfo()
476
477    def copy(self, order='C', data=None, copy_data=True):
478        """
479        Return a copy of the current instance.
480
481        If ``data`` is supplied then a view (reference) of ``data`` is used,
482        and ``copy_data`` is ignored.
483
484        Parameters
485        ----------
486        order : {'C', 'F', 'A', 'K'}, optional
487            Controls the memory layout of the copy. 'C' means C-order,
488            'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
489            'C' otherwise. 'K' means match the layout of ``a`` as closely
490            as possible. (Note that this function and :func:numpy.copy are very
491            similar, but have different default values for their order=
492            arguments.)  Default is 'C'.
493        data : array, optional
494            If supplied then use a view of ``data`` instead of the instance
495            data.  This allows copying the instance attributes and meta.
496        copy_data : bool, optional
497            Make a copy of the internal numpy array instead of using a
498            reference.  Default is True.
499
500        Returns
501        -------
502        col : Column or MaskedColumn
503            Copy of the current column (same type as original)
504        """
505        if data is None:
506            data = self.data
507            if copy_data:
508                data = data.copy(order)
509
510        out = data.view(self.__class__)
511        out.__array_finalize__(self)
512
513        # If there is meta on the original column then deepcopy (since "copy" of column
514        # implies complete independence from original).  __array_finalize__ will have already
515        # made a light copy.  I'm not sure how to avoid that initial light copy.
516        if self.meta is not None:
517            out.meta = self.meta  # MetaData descriptor does a deepcopy here
518
519        # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask
520        # from self, which is not the idea here, so undo
521        if isinstance(self, MaskedColumn):
522            out._mask = data._mask
523
524        self._copy_groups(out)
525
526        return out
527
528    def __setstate__(self, state):
529        """
530        Restore the internal state of the Column/MaskedColumn for pickling
531        purposes.  This requires that the last element of ``state`` is a
532        5-tuple that has Column-specific state values.
533        """
534        # Get the Column attributes
535        names = ('_name', '_unit', '_format', 'description', 'meta', 'indices')
536        attrs = {name: val for name, val in zip(names, state[-1])}
537
538        state = state[:-1]
539
540        # Using super().__setstate__(state) gives
541        # "TypeError 'int' object is not iterable", raised in
542        # astropy.table._column_mixins._ColumnGetitemShim.__setstate_cython__()
543        # Previously, it seems to have given an infinite recursion.
544        # Hence, manually call the right super class to actually set up
545        # the array object.
546        super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
547        super_class.__setstate__(self, state)
548
549        # Set the Column attributes
550        for name, val in attrs.items():
551            setattr(self, name, val)
552        self._parent_table = None
553
554    def __reduce__(self):
555        """
556        Return a 3-tuple for pickling a Column.  Use the super-class
557        functionality but then add in a 5-tuple of Column-specific values
558        that get used in __setstate__.
559        """
560        super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
561        reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self)
562
563        # Define Column-specific attrs and meta that gets added to state.
564        column_state = (self.name, self.unit, self.format, self.description,
565                        self.meta, self.indices)
566        state = state + (column_state,)
567
568        return reconstruct_func, reconstruct_func_args, state
569
570    def __array_finalize__(self, obj):
571        # Obj will be none for direct call to Column() creator
572        if obj is None:
573            return
574
575        if callable(super().__array_finalize__):
576            super().__array_finalize__(obj)
577
578        # Self was created from template (e.g. obj[slice] or (obj * 2))
579        # or viewcast e.g. obj.view(Column).  In either case we want to
580        # init Column attributes for self from obj if possible.
581        self.parent_table = None
582        if not hasattr(self, 'indices'):  # may have been copied in __new__
583            self.indices = []
584        self._copy_attrs(obj)
585        if 'info' in getattr(obj, '__dict__', {}):
586            self.info = obj.info
587
588    def __array_wrap__(self, out_arr, context=None):
589        """
590        __array_wrap__ is called at the end of every ufunc.
591
592        Normally, we want a Column object back and do not have to do anything
593        special. But there are two exceptions:
594
595        1) If the output shape is different (e.g. for reduction ufuncs
596           like sum() or mean()), a Column still linking to a parent_table
597           makes little sense, so we return the output viewed as the
598           column content (ndarray or MaskedArray).
599           For this case, we use "[()]" to select everything, and to ensure we
600           convert a zero rank array to a scalar. (For some reason np.sum()
601           returns a zero rank scalar array while np.mean() returns a scalar;
602           So the [()] is needed for this case.
603
604        2) When the output is created by any function that returns a boolean
605           we also want to consistently return an array rather than a column
606           (see #1446 and #1685)
607        """
608        out_arr = super().__array_wrap__(out_arr, context)
609        if (self.shape != out_arr.shape
610            or (isinstance(out_arr, BaseColumn)
611                and (context is not None
612                     and context[0] in _comparison_functions))):
613            return out_arr.data[()]
614        else:
615            return out_arr
616
617    @property
618    def name(self):
619        """
620        The name of this column.
621        """
622        return self._name
623
624    @name.setter
625    def name(self, val):
626        val = fix_column_name(val)
627
628        if self.parent_table is not None:
629            table = self.parent_table
630            table.columns._rename_column(self.name, val)
631
632        self._name = val
633
634    @property
635    def format(self):
636        """
637        Format string for displaying values in this column.
638        """
639
640        return self._format
641
642    @format.setter
643    def format(self, format_string):
644
645        prev_format = getattr(self, '_format', None)
646
647        self._format = format_string  # set new format string
648
649        try:
650            # test whether it formats without error exemplarily
651            self.pformat(max_lines=1)
652        except Exception as err:
653            # revert to restore previous format if there was one
654            self._format = prev_format
655            raise ValueError(
656                "Invalid format for column '{}': could not display "
657                "values in this column using this format".format(
658                    self.name)) from err
659
660    @property
661    def descr(self):
662        """Array-interface compliant full description of the column.
663
664        This returns a 3-tuple (name, type, shape) that can always be
665        used in a structured array dtype definition.
666        """
667        return (self.name, self.dtype.str, self.shape[1:])
668
669    def iter_str_vals(self):
670        """
671        Return an iterator that yields the string-formatted values of this
672        column.
673
674        Returns
675        -------
676        str_vals : iterator
677            Column values formatted as strings
678        """
679        # Iterate over formatted values with no max number of lines, no column
680        # name, no unit, and ignoring the returned header info in outs.
681        _pformat_col_iter = self._formatter._pformat_col_iter
682        for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False,
683                                         show_dtype=False, outs={}):
684            yield str_val
685
686    def attrs_equal(self, col):
687        """Compare the column attributes of ``col`` to this object.
688
689        The comparison attributes are: ``name``, ``unit``, ``dtype``,
690        ``format``, ``description``, and ``meta``.
691
692        Parameters
693        ----------
694        col : Column
695            Comparison column
696
697        Returns
698        -------
699        equal : bool
700            True if all attributes are equal
701        """
702        if not isinstance(col, BaseColumn):
703            raise ValueError('Comparison `col` must be a Column or '
704                             'MaskedColumn object')
705
706        attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta')
707        equal = all(getattr(self, x) == getattr(col, x) for x in attrs)
708
709        return equal
710
711    @property
712    def _formatter(self):
713        return FORMATTER if (self.parent_table is None) else self.parent_table.formatter
714
715    def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False,
716                html=False):
717        """Return a list of formatted string representation of column values.
718
719        If no value of ``max_lines`` is supplied then the height of the
720        screen terminal is used to set ``max_lines``.  If the terminal
721        height cannot be determined then the default will be
722        determined using the ``astropy.conf.max_lines`` configuration
723        item. If a negative value of ``max_lines`` is supplied then
724        there is no line limit applied.
725
726        Parameters
727        ----------
728        max_lines : int
729            Maximum lines of output (header + data rows)
730
731        show_name : bool
732            Include column name. Default is True.
733
734        show_unit : bool
735            Include a header row for unit. Default is False.
736
737        show_dtype : bool
738            Include column dtype. Default is False.
739
740        html : bool
741            Format the output as an HTML table. Default is False.
742
743        Returns
744        -------
745        lines : list
746            List of lines with header and formatted column values
747
748        """
749        _pformat_col = self._formatter._pformat_col
750        lines, outs = _pformat_col(self, max_lines, show_name=show_name,
751                                   show_unit=show_unit, show_dtype=show_dtype,
752                                   html=html)
753        return lines
754
755    def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False):
756        """Print a formatted string representation of column values.
757
758        If no value of ``max_lines`` is supplied then the height of the
759        screen terminal is used to set ``max_lines``.  If the terminal
760        height cannot be determined then the default will be
761        determined using the ``astropy.conf.max_lines`` configuration
762        item. If a negative value of ``max_lines`` is supplied then
763        there is no line limit applied.
764
765        Parameters
766        ----------
767        max_lines : int
768            Maximum number of values in output
769
770        show_name : bool
771            Include column name. Default is True.
772
773        show_unit : bool
774            Include a header row for unit. Default is False.
775
776        show_dtype : bool
777            Include column dtype. Default is True.
778        """
779        _pformat_col = self._formatter._pformat_col
780        lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit,
781                                   show_dtype=show_dtype)
782
783        n_header = outs['n_header']
784        for i, line in enumerate(lines):
785            if i < n_header:
786                color_print(line, 'red')
787            else:
788                print(line)
789
790    def more(self, max_lines=None, show_name=True, show_unit=False):
791        """Interactively browse column with a paging interface.
792
793        Supported keys::
794
795          f, <space> : forward one page
796          b : back one page
797          r : refresh same page
798          n : next row
799          p : previous row
800          < : go to beginning
801          > : go to end
802          q : quit browsing
803          h : print this help
804
805        Parameters
806        ----------
807        max_lines : int
808            Maximum number of lines in table output.
809
810        show_name : bool
811            Include a header row for column names. Default is True.
812
813        show_unit : bool
814            Include a header row for unit. Default is False.
815
816        """
817        _more_tabcol = self._formatter._more_tabcol
818        _more_tabcol(self, max_lines=max_lines, show_name=show_name,
819                     show_unit=show_unit)
820
821    @property
822    def unit(self):
823        """
824        The unit associated with this column.  May be a string or a
825        `astropy.units.UnitBase` instance.
826
827        Setting the ``unit`` property does not change the values of the
828        data.  To perform a unit conversion, use ``convert_unit_to``.
829        """
830        return self._unit
831
832    @unit.setter
833    def unit(self, unit):
834        if unit is None:
835            self._unit = None
836        else:
837            self._unit = Unit(unit, parse_strict='silent')
838
839    @unit.deleter
840    def unit(self):
841        self._unit = None
842
843    def convert_unit_to(self, new_unit, equivalencies=[]):
844        """
845        Converts the values of the column in-place from the current
846        unit to the given unit.
847
848        To change the unit associated with this column without
849        actually changing the data values, simply set the ``unit``
850        property.
851
852        Parameters
853        ----------
854        new_unit : str or `astropy.units.UnitBase` instance
855            The unit to convert to.
856
857        equivalencies : list of tuple
858           A list of equivalence pairs to try if the unit are not
859           directly convertible.  See :ref:`astropy:unit_equivalencies`.
860
861        Raises
862        ------
863        astropy.units.UnitsError
864            If units are inconsistent
865        """
866        if self.unit is None:
867            raise ValueError("No unit set on column")
868        self.data[:] = self.unit.to(
869            new_unit, self.data, equivalencies=equivalencies)
870        self.unit = new_unit
871
872    @property
873    def groups(self):
874        if not hasattr(self, '_groups'):
875            self._groups = groups.ColumnGroups(self)
876        return self._groups
877
878    def group_by(self, keys):
879        """
880        Group this column by the specified ``keys``
881
882        This effectively splits the column into groups which correspond to
883        unique values of the ``keys`` grouping object.  The output is a new
884        `Column` or `MaskedColumn` which contains a copy of this column but
885        sorted by row according to ``keys``.
886
887        The ``keys`` input to ``group_by`` must be a numpy array with the
888        same length as this column.
889
890        Parameters
891        ----------
892        keys : numpy array
893            Key grouping object
894
895        Returns
896        -------
897        out : Column
898            New column with groups attribute set accordingly
899        """
900        return groups.column_group_by(self, keys)
901
902    def _copy_groups(self, out):
903        """
904        Copy current groups into a copy of self ``out``
905        """
906        if self.parent_table:
907            if hasattr(self.parent_table, '_groups'):
908                out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices)
909        elif hasattr(self, '_groups'):
910            out._groups = groups.ColumnGroups(out, indices=self._groups._indices)
911
912    # Strip off the BaseColumn-ness for repr and str so that
913    # MaskedColumn.data __repr__ does not include masked_BaseColumn(data =
914    # [1 2], ...).
915    def __repr__(self):
916        return np.asarray(self).__repr__()
917
918    @property
919    def quantity(self):
920        """
921        A view of this table column as a `~astropy.units.Quantity` object with
922        units given by the Column's `unit` parameter.
923        """
924        # the Quantity initializer is used here because it correctly fails
925        # if the column's values are non-numeric (like strings), while .view
926        # will happily return a quantity with gibberish for numerical values
927        return Quantity(self, self.unit, copy=False, dtype=self.dtype, order='A', subok=True)
928
929    def to(self, unit, equivalencies=[], **kwargs):
930        """
931        Converts this table column to a `~astropy.units.Quantity` object with
932        the requested units.
933
934        Parameters
935        ----------
936        unit : unit-like
937            The unit to convert to (i.e., a valid argument to the
938            :meth:`astropy.units.Quantity.to` method).
939        equivalencies : list of tuple
940            Equivalencies to use for this conversion.  See
941            :meth:`astropy.units.Quantity.to` for more details.
942
943        Returns
944        -------
945        quantity : `~astropy.units.Quantity`
946            A quantity object with the contents of this column in the units
947            ``unit``.
948        """
949        return self.quantity.to(unit, equivalencies)
950
951    def _copy_attrs(self, obj):
952        """
953        Copy key column attributes from ``obj`` to self
954        """
955        for attr in ('name', 'unit', '_format', 'description'):
956            val = getattr(obj, attr, None)
957            setattr(self, attr, val)
958
959        # Light copy of meta if it is not empty
960        obj_meta = getattr(obj, 'meta', None)
961        if obj_meta:
962            self.meta = obj_meta.copy()
963
964    @staticmethod
965    def _encode_str(value):
966        """
967        Encode anything that is unicode-ish as utf-8.  This method is only
968        called for Py3+.
969        """
970        if isinstance(value, str):
971            value = value.encode('utf-8')
972        elif isinstance(value, bytes) or value is np.ma.masked:
973            pass
974        else:
975            arr = np.asarray(value)
976            if arr.dtype.char == 'U':
977                arr = np.char.encode(arr, encoding='utf-8')
978                if isinstance(value, np.ma.MaskedArray):
979                    arr = np.ma.array(arr, mask=value.mask, copy=False)
980            value = arr
981
982        return value
983
984    def tolist(self):
985        if self.dtype.kind == 'S':
986            return np.chararray.decode(self, encoding='utf-8').tolist()
987        else:
988            return super().tolist()
989
990
991class Column(BaseColumn):
992    """Define a data column for use in a Table object.
993
994    Parameters
995    ----------
996    data : list, ndarray, or None
997        Column data values
998    name : str
999        Column name and key for reference within Table
1000    dtype : `~numpy.dtype`-like
1001        Data type for column
1002    shape : tuple or ()
1003        Dimensions of a single row element in the column data
1004    length : int or 0
1005        Number of row elements in column data
1006    description : str or None
1007        Full description of column
1008    unit : str or None
1009        Physical unit
1010    format : str, None, or callable
1011        Format string for outputting column values.  This can be an
1012        "old-style" (``format % value``) or "new-style" (`str.format`)
1013        format specification string or a function or any callable object that
1014        accepts a single value and returns a string.
1015    meta : dict-like or None
1016        Meta-data associated with the column
1017
1018    Examples
1019    --------
1020    A Column can be created in two different ways:
1021
1022    - Provide a ``data`` value but not ``shape`` or ``length`` (which are
1023      inferred from the data).
1024
1025      Examples::
1026
1027        col = Column(data=[1, 2], name='name')  # shape=(2,)
1028        col = Column(data=[[1, 2], [3, 4]], name='name')  # shape=(2, 2)
1029        col = Column(data=[1, 2], name='name', dtype=float)
1030        col = Column(data=np.array([1, 2]), name='name')
1031        col = Column(data=['hello', 'world'], name='name')
1032
1033      The ``dtype`` argument can be any value which is an acceptable
1034      fixed-size data-type initializer for the numpy.dtype() method.  See
1035      `<https://numpy.org/doc/stable/reference/arrays.dtypes.html>`_.
1036      Examples include:
1037
1038      - Python non-string type (float, int, bool)
1039      - Numpy non-string type (e.g. np.float32, np.int64, np.bool\\_)
1040      - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
1041
1042      If no ``dtype`` value is provide then the type is inferred using
1043      ``np.array(data)``.
1044
1045    - Provide ``length`` and optionally ``shape``, but not ``data``
1046
1047      Examples::
1048
1049        col = Column(name='name', length=5)
1050        col = Column(name='name', dtype=int, length=10, shape=(3,4))
1051
1052      The default ``dtype`` is ``np.float64``.  The ``shape`` argument is the
1053      array shape of a single cell in the column.
1054    """
1055
1056    def __new__(cls, data=None, name=None,
1057                dtype=None, shape=(), length=0,
1058                description=None, unit=None, format=None, meta=None,
1059                copy=False, copy_indices=True):
1060
1061        if isinstance(data, MaskedColumn) and np.any(data.mask):
1062            raise TypeError("Cannot convert a MaskedColumn with masked value to a Column")
1063
1064        self = super().__new__(
1065            cls, data=data, name=name, dtype=dtype, shape=shape, length=length,
1066            description=description, unit=unit, format=format, meta=meta,
1067            copy=copy, copy_indices=copy_indices)
1068        return self
1069
1070    def __setattr__(self, item, value):
1071        if not isinstance(self, MaskedColumn) and item == "mask":
1072            raise AttributeError("cannot set mask value to a column in non-masked Table")
1073        super().__setattr__(item, value)
1074
1075        if item == 'unit' and issubclass(self.dtype.type, np.number):
1076            try:
1077                converted = self.parent_table._convert_col_for_table(self)
1078            except AttributeError:  # Either no parent table or parent table is None
1079                pass
1080            else:
1081                if converted is not self:
1082                    self.parent_table.replace_column(self.name, converted)
1083
1084    def _base_repr_(self, html=False):
1085        # If scalar then just convert to correct numpy type and use numpy repr
1086        if self.ndim == 0:
1087            return repr(self.item())
1088
1089        descr_vals = [self.__class__.__name__]
1090        unit = None if self.unit is None else str(self.unit)
1091        shape = None if self.ndim <= 1 else self.shape[1:]
1092        for attr, val in (('name', self.name),
1093                          ('dtype', dtype_info_name(self.dtype)),
1094                          ('shape', shape),
1095                          ('unit', unit),
1096                          ('format', self.format),
1097                          ('description', self.description),
1098                          ('length', len(self))):
1099
1100            if val is not None:
1101                descr_vals.append(f'{attr}={val!r}')
1102
1103        descr = '<' + ' '.join(descr_vals) + '>\n'
1104
1105        if html:
1106            from astropy.utils.xml.writer import xml_escape
1107            descr = xml_escape(descr)
1108
1109        data_lines, outs = self._formatter._pformat_col(
1110            self, show_name=False, show_unit=False, show_length=False, html=html)
1111
1112        out = descr + '\n'.join(data_lines)
1113
1114        return out
1115
1116    def _repr_html_(self):
1117        return self._base_repr_(html=True)
1118
1119    def __repr__(self):
1120        return self._base_repr_(html=False)
1121
1122    def __str__(self):
1123        # If scalar then just convert to correct numpy type and use numpy repr
1124        if self.ndim == 0:
1125            return str(self.item())
1126
1127        lines, outs = self._formatter._pformat_col(self)
1128        return '\n'.join(lines)
1129
1130    def __bytes__(self):
1131        return str(self).encode('utf-8')
1132
1133    def _check_string_truncate(self, value):
1134        """
1135        Emit a warning if any elements of ``value`` will be truncated when
1136        ``value`` is assigned to self.
1137        """
1138        # Convert input ``value`` to the string dtype of this column and
1139        # find the length of the longest string in the array.
1140        value = np.asanyarray(value, dtype=self.dtype.type)
1141        if value.size == 0:
1142            return
1143        value_str_len = np.char.str_len(value).max()
1144
1145        # Parse the array-protocol typestring (e.g. '|U15') of self.dtype which
1146        # has the character repeat count on the right side.
1147        self_str_len = dtype_bytes_or_chars(self.dtype)
1148
1149        if value_str_len > self_str_len:
1150            warnings.warn('truncated right side string(s) longer than {} '
1151                          'character(s) during assignment'
1152                          .format(self_str_len),
1153                          StringTruncateWarning,
1154                          stacklevel=3)
1155
1156    def __setitem__(self, index, value):
1157        if self.dtype.char == 'S':
1158            value = self._encode_str(value)
1159
1160        # Issue warning for string assignment that truncates ``value``
1161        if issubclass(self.dtype.type, np.character):
1162            self._check_string_truncate(value)
1163
1164        # update indices
1165        self.info.adjust_indices(index, value, len(self))
1166
1167        # Set items using a view of the underlying data, as it gives an
1168        # order-of-magnitude speed-up. [#2994]
1169        self.data[index] = value
1170
1171    __eq__ = _make_compare('__eq__')
1172    __ne__ = _make_compare('__ne__')
1173    __gt__ = _make_compare('__gt__')
1174    __lt__ = _make_compare('__lt__')
1175    __ge__ = _make_compare('__ge__')
1176    __le__ = _make_compare('__le__')
1177
1178    def insert(self, obj, values, axis=0):
1179        """
1180        Insert values before the given indices in the column and return
1181        a new `~astropy.table.Column` object.
1182
1183        Parameters
1184        ----------
1185        obj : int, slice or sequence of int
1186            Object that defines the index or indices before which ``values`` is
1187            inserted.
1188        values : array-like
1189            Value(s) to insert.  If the type of ``values`` is different from
1190            that of the column, ``values`` is converted to the matching type.
1191            ``values`` should be shaped so that it can be broadcast appropriately.
1192        axis : int, optional
1193            Axis along which to insert ``values``.  If ``axis`` is None then
1194            the column array is flattened before insertion.  Default is 0,
1195            which will insert a row.
1196
1197        Returns
1198        -------
1199        out : `~astropy.table.Column`
1200            A copy of column with ``values`` and ``mask`` inserted.  Note that the
1201            insertion does not occur in-place: a new column is returned.
1202        """
1203        if self.dtype.kind == 'O':
1204            # Even if values is array-like (e.g. [1,2,3]), insert as a single
1205            # object.  Numpy.insert instead inserts each element in an array-like
1206            # input individually.
1207            data = np.insert(self, obj, None, axis=axis)
1208            data[obj] = values
1209        else:
1210            self_for_insert = _expand_string_array_for_values(self, values)
1211            data = np.insert(self_for_insert, obj, values, axis=axis)
1212
1213        out = data.view(self.__class__)
1214        out.__array_finalize__(self)
1215        return out
1216
1217    # We do this to make the methods show up in the API docs
1218    name = BaseColumn.name
1219    unit = BaseColumn.unit
1220    copy = BaseColumn.copy
1221    more = BaseColumn.more
1222    pprint = BaseColumn.pprint
1223    pformat = BaseColumn.pformat
1224    convert_unit_to = BaseColumn.convert_unit_to
1225    quantity = BaseColumn.quantity
1226    to = BaseColumn.to
1227
1228
1229class MaskedColumnInfo(ColumnInfo):
1230    """
1231    Container for meta information like name, description, format.
1232
1233    This is required when the object is used as a mixin column within a table,
1234    but can be used as a general way to store meta information.  In this case
1235    it just adds the ``mask_val`` attribute.
1236    """
1237    # Add `serialize_method` attribute to the attrs that MaskedColumnInfo knows
1238    # about.  This allows customization of the way that MaskedColumn objects
1239    # get written to file depending on format.  The default is to use whatever
1240    # the writer would normally do, which in the case of FITS or ECSV is to use
1241    # a NULL value within the data itself.  If serialize_method is 'data_mask'
1242    # then the mask is explicitly written out as a separate column if there
1243    # are any masked values.  See also code below.
1244    attr_names = ColumnInfo.attr_names | {'serialize_method'}
1245
1246    # When `serialize_method` is 'data_mask', and data and mask are being written
1247    # as separate columns, use column names <name> and <name>.mask (instead
1248    # of default encoding as <name>.data and <name>.mask).
1249    _represent_as_dict_primary_data = 'data'
1250
1251    mask_val = np.ma.masked
1252
1253    def __init__(self, bound=False):
1254        super().__init__(bound)
1255
1256        # If bound to a data object instance then create the dict of attributes
1257        # which stores the info attribute values.
1258        if bound:
1259            # Specify how to serialize this object depending on context.
1260            self.serialize_method = {'fits': 'null_value',
1261                                     'ecsv': 'null_value',
1262                                     'hdf5': 'data_mask',
1263                                     'parquet': 'data_mask',
1264                                     None: 'null_value'}
1265
1266    def _represent_as_dict(self):
1267        out = super()._represent_as_dict()
1268
1269        col = self._parent
1270
1271        # If the serialize method for this context (e.g. 'fits' or 'ecsv') is
1272        # 'data_mask', that means to serialize using an explicit mask column.
1273        method = self.serialize_method[self._serialize_context]
1274
1275        if method == 'data_mask':
1276            # Note: a driver here is a performance issue in #8443 where repr() of a
1277            # np.ma.MaskedArray value is up to 10 times slower than repr of a normal array
1278            # value.  So regardless of whether there are masked elements it is useful to
1279            # explicitly define this as a serialized column and use col.data.data (ndarray)
1280            # instead of letting it fall through to the "standard" serialization machinery.
1281            out['data'] = col.data.data
1282
1283            if np.any(col.mask):
1284                # Only if there are actually masked elements do we add the ``mask`` column
1285                out['mask'] = col.mask
1286
1287        elif method == 'null_value':
1288            pass
1289
1290        else:
1291            raise ValueError('serialize method must be either "data_mask" or "null_value"')
1292
1293        return out
1294
1295
1296class MaskedColumn(Column, _MaskedColumnGetitemShim, ma.MaskedArray):
1297    """Define a masked data column for use in a Table object.
1298
1299    Parameters
1300    ----------
1301    data : list, ndarray, or None
1302        Column data values
1303    name : str
1304        Column name and key for reference within Table
1305    mask : list, ndarray or None
1306        Boolean mask for which True indicates missing or invalid data
1307    fill_value : float, int, str, or None
1308        Value used when filling masked column elements
1309    dtype : `~numpy.dtype`-like
1310        Data type for column
1311    shape : tuple or ()
1312        Dimensions of a single row element in the column data
1313    length : int or 0
1314        Number of row elements in column data
1315    description : str or None
1316        Full description of column
1317    unit : str or None
1318        Physical unit
1319    format : str, None, or callable
1320        Format string for outputting column values.  This can be an
1321        "old-style" (``format % value``) or "new-style" (`str.format`)
1322        format specification string or a function or any callable object that
1323        accepts a single value and returns a string.
1324    meta : dict-like or None
1325        Meta-data associated with the column
1326
1327    Examples
1328    --------
1329    A MaskedColumn is similar to a Column except that it includes ``mask`` and
1330    ``fill_value`` attributes.  It can be created in two different ways:
1331
1332    - Provide a ``data`` value but not ``shape`` or ``length`` (which are
1333      inferred from the data).
1334
1335      Examples::
1336
1337        col = MaskedColumn(data=[1, 2], name='name')
1338        col = MaskedColumn(data=[1, 2], name='name', mask=[True, False])
1339        col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99)
1340
1341      The ``mask`` argument will be cast as a boolean array and specifies
1342      which elements are considered to be missing or invalid.
1343
1344      The ``dtype`` argument can be any value which is an acceptable
1345      fixed-size data-type initializer for the numpy.dtype() method.  See
1346      `<https://numpy.org/doc/stable/reference/arrays.dtypes.html>`_.
1347      Examples include:
1348
1349      - Python non-string type (float, int, bool)
1350      - Numpy non-string type (e.g. np.float32, np.int64, np.bool\\_)
1351      - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
1352
1353      If no ``dtype`` value is provide then the type is inferred using
1354      ``np.array(data)``.  When ``data`` is provided then the ``shape``
1355      and ``length`` arguments are ignored.
1356
1357    - Provide ``length`` and optionally ``shape``, but not ``data``
1358
1359      Examples::
1360
1361        col = MaskedColumn(name='name', length=5)
1362        col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4))
1363
1364      The default ``dtype`` is ``np.float64``.  The ``shape`` argument is the
1365      array shape of a single cell in the column.
1366    """
1367    info = MaskedColumnInfo()
1368
1369    def __new__(cls, data=None, name=None, mask=None, fill_value=None,
1370                dtype=None, shape=(), length=0,
1371                description=None, unit=None, format=None, meta=None,
1372                copy=False, copy_indices=True):
1373
1374        if mask is None:
1375            # If mask is None then we need to determine the mask (if any) from the data.
1376            # The naive method is looking for a mask attribute on data, but this can fail,
1377            # see #8816.  Instead use ``MaskedArray`` to do the work.
1378            mask = ma.MaskedArray(data).mask
1379            if mask is np.ma.nomask:
1380                # Handle odd-ball issue with np.ma.nomask (numpy #13758), and see below.
1381                mask = False
1382            elif copy:
1383                mask = mask.copy()
1384
1385        elif mask is np.ma.nomask:
1386            # Force the creation of a full mask array as nomask is tricky to
1387            # use and will fail in an unexpected manner when setting a value
1388            # to the mask.
1389            mask = False
1390        else:
1391            mask = deepcopy(mask)
1392
1393        # Create self using MaskedArray as a wrapper class, following the example of
1394        # class MSubArray in
1395        # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py
1396        # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and
1397        # https://github.com/astropy/astropy/commit/ff6039e8)
1398
1399        # First just pass through all args and kwargs to BaseColumn, then wrap that object
1400        # with MaskedArray.
1401        self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name,
1402                               unit=unit, format=format, description=description,
1403                               meta=meta, copy=copy, copy_indices=copy_indices)
1404        self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask)
1405        # The above process preserves info relevant for Column, but this does
1406        # not include serialize_method (and possibly other future attributes)
1407        # relevant for MaskedColumn, so we set info explicitly.
1408        if 'info' in getattr(data, '__dict__', {}):
1409            self.info = data.info
1410
1411        # Note: do not set fill_value in the MaskedArray constructor because this does not
1412        # go through the fill_value workarounds.
1413        if fill_value is None and getattr(data, 'fill_value', None) is not None:
1414            # Coerce the fill_value to the correct type since `data` may be a
1415            # different dtype than self.
1416            fill_value = np.array(data.fill_value, self.dtype)[()]
1417        self.fill_value = fill_value
1418
1419        self.parent_table = None
1420
1421        # needs to be done here since self doesn't come from BaseColumn.__new__
1422        for index in self.indices:
1423            index.replace_col(self_data, self)
1424
1425        return self
1426
1427    @property
1428    def fill_value(self):
1429        return self.get_fill_value()  # defer to native ma.MaskedArray method
1430
1431    @fill_value.setter
1432    def fill_value(self, val):
1433        """Set fill value both in the masked column view and in the parent table
1434        if it exists.  Setting one or the other alone doesn't work."""
1435
1436        # another ma bug workaround: If the value of fill_value for a string array is
1437        # requested but not yet set then it gets created as 'N/A'.  From this point onward
1438        # any new fill_values are truncated to 3 characters.  Note that this does not
1439        # occur if the masked array is a structured array (as in the previous block that
1440        # deals with the parent table).
1441        #
1442        # >>> x = ma.array(['xxxx'])
1443        # >>> x.fill_value  # fill_value now gets represented as an 'S3' array
1444        # 'N/A'
1445        # >>> x.fill_value='yyyy'
1446        # >>> x.fill_value
1447        # 'yyy'
1448        #
1449        # To handle this we are forced to reset a private variable first:
1450        self._fill_value = None
1451
1452        self.set_fill_value(val)  # defer to native ma.MaskedArray method
1453
1454    @property
1455    def data(self):
1456        """The plain MaskedArray data held by this column."""
1457        out = self.view(np.ma.MaskedArray)
1458        # By default, a MaskedArray view will set the _baseclass to be the
1459        # same as that of our own class, i.e., BaseColumn.  Since we want
1460        # to return a plain MaskedArray, we reset the baseclass accordingly.
1461        out._baseclass = np.ndarray
1462        return out
1463
1464    def filled(self, fill_value=None):
1465        """Return a copy of self, with masked values filled with a given value.
1466
1467        Parameters
1468        ----------
1469        fill_value : scalar; optional
1470            The value to use for invalid entries (`None` by default).  If
1471            `None`, the ``fill_value`` attribute of the array is used
1472            instead.
1473
1474        Returns
1475        -------
1476        filled_column : Column
1477            A copy of ``self`` with masked entries replaced by `fill_value`
1478            (be it the function argument or the attribute of ``self``).
1479        """
1480        if fill_value is None:
1481            fill_value = self.fill_value
1482
1483        data = super().filled(fill_value)
1484        # Use parent table definition of Column if available
1485        column_cls = self.parent_table.Column if (self.parent_table is not None) else Column
1486
1487        out = column_cls(name=self.name, data=data, unit=self.unit,
1488                         format=self.format, description=self.description,
1489                         meta=deepcopy(self.meta))
1490        return out
1491
1492    def insert(self, obj, values, mask=None, axis=0):
1493        """
1494        Insert values along the given axis before the given indices and return
1495        a new `~astropy.table.MaskedColumn` object.
1496
1497        Parameters
1498        ----------
1499        obj : int, slice or sequence of int
1500            Object that defines the index or indices before which ``values`` is
1501            inserted.
1502        values : array-like
1503            Value(s) to insert.  If the type of ``values`` is different from
1504            that of the column, ``values`` is converted to the matching type.
1505            ``values`` should be shaped so that it can be broadcast appropriately.
1506        mask : bool or array-like
1507            Mask value(s) to insert.  If not supplied, and values does not have
1508            a mask either, then False is used.
1509        axis : int, optional
1510            Axis along which to insert ``values``.  If ``axis`` is None then
1511            the column array is flattened before insertion.  Default is 0,
1512            which will insert a row.
1513
1514        Returns
1515        -------
1516        out : `~astropy.table.MaskedColumn`
1517            A copy of column with ``values`` and ``mask`` inserted.  Note that the
1518            insertion does not occur in-place: a new masked column is returned.
1519        """
1520        self_ma = self.data  # self viewed as MaskedArray
1521
1522        if self.dtype.kind == 'O':
1523            # Even if values is array-like (e.g. [1,2,3]), insert as a single
1524            # object.  Numpy.insert instead inserts each element in an array-like
1525            # input individually.
1526            new_data = np.insert(self_ma.data, obj, None, axis=axis)
1527            new_data[obj] = values
1528        else:
1529            self_ma = _expand_string_array_for_values(self_ma, values)
1530            new_data = np.insert(self_ma.data, obj, values, axis=axis)
1531
1532        if mask is None:
1533            mask = getattr(values, 'mask', np.ma.nomask)
1534            if mask is np.ma.nomask:
1535                if self.dtype.kind == 'O':
1536                    mask = False
1537                else:
1538                    mask = np.zeros(np.shape(values), dtype=bool)
1539
1540        new_mask = np.insert(self_ma.mask, obj, mask, axis=axis)
1541        new_ma = np.ma.array(new_data, mask=new_mask, copy=False)
1542
1543        out = new_ma.view(self.__class__)
1544        out.parent_table = None
1545        out.indices = []
1546        out._copy_attrs(self)
1547        out.fill_value = self.fill_value
1548
1549        return out
1550
1551    def _copy_attrs_slice(self, out):
1552        # Fixes issue #3023: when calling getitem with a MaskedArray subclass
1553        # the original object attributes are not copied.
1554        if out.__class__ is self.__class__:
1555            # TODO: this part is essentially the same as what is done in
1556            # __array_finalize__ and could probably be called directly in our
1557            # override of __getitem__ in _columns_mixins.pyx). Refactor?
1558            if 'info' in self.__dict__:
1559                out.info = self.info
1560            out.parent_table = None
1561            # we need this because __getitem__ does a shallow copy of indices
1562            if out.indices is self.indices:
1563                out.indices = []
1564            out._copy_attrs(self)
1565        return out
1566
1567    def __setitem__(self, index, value):
1568        # Issue warning for string assignment that truncates ``value``
1569        if self.dtype.char == 'S':
1570            value = self._encode_str(value)
1571
1572        if issubclass(self.dtype.type, np.character):
1573            # Account for a bug in np.ma.MaskedArray setitem.
1574            # https://github.com/numpy/numpy/issues/8624
1575            value = np.ma.asanyarray(value, dtype=self.dtype.type)
1576
1577            # Check for string truncation after filling masked items with
1578            # empty (zero-length) string.  Note that filled() does not make
1579            # a copy if there are no masked items.
1580            self._check_string_truncate(value.filled(''))
1581
1582        # update indices
1583        self.info.adjust_indices(index, value, len(self))
1584
1585        ma.MaskedArray.__setitem__(self, index, value)
1586
1587    # We do this to make the methods show up in the API docs
1588    name = BaseColumn.name
1589    copy = BaseColumn.copy
1590    more = BaseColumn.more
1591    pprint = BaseColumn.pprint
1592    pformat = BaseColumn.pformat
1593    convert_unit_to = BaseColumn.convert_unit_to
1594