1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2""" An extensible ASCII table reader and writer.
3
4core.py:
5  Core base classes and functions for reading and writing tables.
6
7:Copyright: Smithsonian Astrophysical Observatory (2010)
8:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
9"""
10
11
12import copy
13import csv
14import functools
15import itertools
16import operator
17import os
18import re
19import warnings
20import inspect
21import fnmatch
22
23from collections import OrderedDict
24from contextlib import suppress
25from io import StringIO
26
27import numpy
28
29from astropy.utils.exceptions import AstropyWarning
30
31from astropy.table import Table
32from astropy.utils.data import get_readable_fileobj
33from . import connect
34from .docs import READ_DOCSTRING, WRITE_DOCSTRING
35
36# Global dictionary mapping format arg to the corresponding Reader class
37FORMAT_CLASSES = {}
38
39# Similar dictionary for fast readers
40FAST_CLASSES = {}
41
42
43def _check_multidim_table(table, max_ndim):
44    """Check that ``table`` has only columns with ndim <= ``max_ndim``
45
46    Currently ECSV is the only built-in format that supports output of arbitrary
47    N-d columns, but HTML supports 2-d.
48    """
49    # No limit?
50    if max_ndim is None:
51        return
52
53    # Check for N-d columns
54    nd_names = [col.info.name for col in table.itercols() if len(col.shape) > max_ndim]
55    if nd_names:
56        raise ValueError(f'column(s) with dimension > {max_ndim} '
57                         "cannot be be written with this format, try using 'ecsv' "
58                         "(Enhanced CSV) format")
59
60
61class CsvWriter:
62    """
63    Internal class to replace the csv writer ``writerow`` and ``writerows``
64    functions so that in the case of ``delimiter=' '`` and
65    ``quoting=csv.QUOTE_MINIMAL``, the output field value is quoted for empty
66    fields (when value == '').
67
68    This changes the API slightly in that the writerow() and writerows()
69    methods return the output written string instead of the length of
70    that string.
71
72    Examples
73    --------
74
75    >>> from astropy.io.ascii.core import CsvWriter
76    >>> writer = CsvWriter(delimiter=' ')
77    >>> print(writer.writerow(['hello', '', 'world']))
78    hello "" world
79    """
80    # Random 16-character string that gets injected instead of any
81    # empty fields and is then replaced post-write with doubled-quotechar.
82    # Created with:
83    # ''.join(random.choice(string.printable[:90]) for _ in range(16))
84    replace_sentinel = '2b=48Av%0-V3p>bX'
85
86    def __init__(self, csvfile=None, **kwargs):
87        self.csvfile = csvfile
88
89        # Temporary StringIO for catching the real csv.writer() object output
90        self.temp_out = StringIO()
91        self.writer = csv.writer(self.temp_out, **kwargs)
92
93        dialect = self.writer.dialect
94        self.quotechar2 = dialect.quotechar * 2
95        self.quote_empty = (dialect.quoting == csv.QUOTE_MINIMAL) and (dialect.delimiter == ' ')
96
97    def writerow(self, values):
98        """
99        Similar to csv.writer.writerow but with the custom quoting behavior.
100        Returns the written string instead of the length of that string.
101        """
102        has_empty = False
103
104        # If QUOTE_MINIMAL and space-delimited then replace empty fields with
105        # the sentinel value.
106        if self.quote_empty:
107            for i, value in enumerate(values):
108                if value == '':
109                    has_empty = True
110                    values[i] = self.replace_sentinel
111
112        return self._writerow(self.writer.writerow, values, has_empty)
113
114    def writerows(self, values_list):
115        """
116        Similar to csv.writer.writerows but with the custom quoting behavior.
117        Returns the written string instead of the length of that string.
118        """
119        has_empty = False
120
121        # If QUOTE_MINIMAL and space-delimited then replace empty fields with
122        # the sentinel value.
123        if self.quote_empty:
124            for values in values_list:
125                for i, value in enumerate(values):
126                    if value == '':
127                        has_empty = True
128                        values[i] = self.replace_sentinel
129
130        return self._writerow(self.writer.writerows, values_list, has_empty)
131
132    def _writerow(self, writerow_func, values, has_empty):
133        """
134        Call ``writerow_func`` (either writerow or writerows) with ``values``.
135        If it has empty fields that have been replaced then change those
136        sentinel strings back to quoted empty strings, e.g. ``""``.
137        """
138        # Clear the temporary StringIO buffer that self.writer writes into and
139        # then call the real csv.writer().writerow or writerows with values.
140        self.temp_out.seek(0)
141        self.temp_out.truncate()
142        writerow_func(values)
143
144        row_string = self.temp_out.getvalue()
145
146        if self.quote_empty and has_empty:
147            row_string = re.sub(self.replace_sentinel, self.quotechar2, row_string)
148
149        # self.csvfile is defined then write the output.  In practice the pure
150        # Python writer calls with csvfile=None, while the fast writer calls with
151        # a file-like object.
152        if self.csvfile:
153            self.csvfile.write(row_string)
154
155        return row_string
156
157
158class MaskedConstant(numpy.ma.core.MaskedConstant):
159    """A trivial extension of numpy.ma.masked
160
161    We want to be able to put the generic term ``masked`` into a dictionary.
162    The constant ``numpy.ma.masked`` is not hashable (see
163    https://github.com/numpy/numpy/issues/4660), so we need to extend it
164    here with a hash value.
165
166    See https://github.com/numpy/numpy/issues/11021 for rationale for
167    __copy__ and __deepcopy__ methods.
168    """
169
170    def __hash__(self):
171        '''All instances of this class shall have the same hash.'''
172        # Any large number will do.
173        return 1234567890
174
175    def __copy__(self):
176        """This is a singleton so just return self."""
177        return self
178
179    def __deepcopy__(self, memo):
180        return self
181
182
183masked = MaskedConstant()
184
185
186class InconsistentTableError(ValueError):
187    """
188    Indicates that an input table is inconsistent in some way.
189
190    The default behavior of ``BaseReader`` is to throw an instance of
191    this class if a data row doesn't match the header.
192    """
193
194
195class OptionalTableImportError(ImportError):
196    """
197    Indicates that a dependency for table reading is not present.
198
199    An instance of this class is raised whenever an optional reader
200    with certain required dependencies cannot operate because of
201    an ImportError.
202    """
203
204
205class ParameterError(NotImplementedError):
206    """
207    Indicates that a reader cannot handle a passed parameter.
208
209    The C-based fast readers in ``io.ascii`` raise an instance of
210    this error class upon encountering a parameter that the
211    C engine cannot handle.
212    """
213
214
215class FastOptionsError(NotImplementedError):
216    """
217    Indicates that one of the specified options for fast
218    reading is invalid.
219    """
220
221
222class NoType:
223    """
224    Superclass for ``StrType`` and ``NumType`` classes.
225
226    This class is the default type of ``Column`` and provides a base
227    class for other data types.
228    """
229
230
231class StrType(NoType):
232    """
233    Indicates that a column consists of text data.
234    """
235
236
237class NumType(NoType):
238    """
239    Indicates that a column consists of numerical data.
240    """
241
242
243class FloatType(NumType):
244    """
245    Describes floating-point data.
246    """
247
248
249class BoolType(NoType):
250    """
251    Describes boolean data.
252    """
253
254
255class IntType(NumType):
256    """
257    Describes integer data.
258    """
259
260
261class AllType(StrType, FloatType, IntType):
262    """
263    Subclass of all other data types.
264
265    This type is returned by ``convert_numpy`` if the given numpy
266    type does not match ``StrType``, ``FloatType``, or ``IntType``.
267    """
268
269
270class Column:
271    """Table column.
272
273    The key attributes of a Column object are:
274
275    * **name** : column name
276    * **type** : column type (NoType, StrType, NumType, FloatType, IntType)
277    * **dtype** : numpy dtype (optional, overrides **type** if set)
278    * **str_vals** : list of column values as strings
279    * **fill_values** : dict of fill values
280    * **shape** : list of element shape (default [] => scalar)
281    * **data** : list of converted column values
282    * **subtype** : actual datatype for columns serialized with JSON
283    """
284
285    def __init__(self, name):
286        self.name = name
287        self.type = NoType  # Generic type (Int, Float, Str etc)
288        self.dtype = None  # Numpy dtype if available
289        self.str_vals = []
290        self.fill_values = {}
291        self.shape = []
292        self.subtype = None
293
294
295class BaseInputter:
296    """
297    Get the lines from the table input and return a list of lines.
298
299    """
300
301    encoding = None
302    """Encoding used to read the file"""
303
304    def get_lines(self, table, newline=None):
305        """
306        Get the lines from the ``table`` input. The input table can be one of:
307
308        * File name
309        * String (newline separated) with all header and data lines (must have at least 2 lines)
310        * File-like object with read() method
311        * List of strings
312
313        Parameters
314        ----------
315        table : str, file-like, list
316            Can be either a file name, string (newline separated) with all header and data
317            lines (must have at least 2 lines), a file-like object with a
318            ``read()`` method, or a list of strings.
319        newline: line separator, if `None` use OS default from ``splitlines()``.
320
321        Returns
322        -------
323        lines : list
324            List of lines
325        """
326        try:
327            if (hasattr(table, 'read')
328                    or ('\n' not in table + '' and '\r' not in table + '')):
329                with get_readable_fileobj(table,
330                                          encoding=self.encoding) as fileobj:
331                    table = fileobj.read()
332            if newline is None:
333                lines = table.splitlines()
334            else:
335                lines = table.split(newline)
336        except TypeError:
337            try:
338                # See if table supports indexing, slicing, and iteration
339                table[0]
340                table[0:1]
341                iter(table)
342                if len(table) > 1:
343                    lines = table
344                else:
345                    # treat single entry as if string had been passed directly
346                    if newline is None:
347                        lines = table[0].splitlines()
348                    else:
349                        lines = table[0].split(newline)
350
351            except TypeError:
352                raise TypeError(
353                    'Input "table" must be a string (filename or data) or an iterable')
354
355        return self.process_lines(lines)
356
357    def process_lines(self, lines):
358        """Process lines for subsequent use.  In the default case do nothing.
359        This routine is not generally intended for removing comment lines or
360        stripping whitespace.  These are done (if needed) in the header and
361        data line processing.
362
363        Override this method if something more has to be done to convert raw
364        input lines to the table rows.  For example the
365        ContinuationLinesInputter derived class accounts for continuation
366        characters if a row is split into lines."""
367        return lines
368
369
370class BaseSplitter:
371    """
372    Base splitter that uses python's split method to do the work.
373
374    This does not handle quoted values.  A key feature is the formulation of
375    __call__ as a generator that returns a list of the split line values at
376    each iteration.
377
378    There are two methods that are intended to be overridden, first
379    ``process_line()`` to do pre-processing on each input line before splitting
380    and ``process_val()`` to do post-processing on each split string value.  By
381    default these apply the string ``strip()`` function.  These can be set to
382    another function via the instance attribute or be disabled entirely, for
383    example::
384
385      reader.header.splitter.process_val = lambda x: x.lstrip()
386      reader.data.splitter.process_val = None
387
388    """
389
390    delimiter = None
391    """ one-character string used to separate fields """
392
393    def process_line(self, line):
394        """Remove whitespace at the beginning or end of line.  This is especially useful for
395        whitespace-delimited files to prevent spurious columns at the beginning or end."""
396        return line.strip()
397
398    def process_val(self, val):
399        """Remove whitespace at the beginning or end of value."""
400        return val.strip()
401
402    def __call__(self, lines):
403        if self.process_line:
404            lines = (self.process_line(x) for x in lines)
405        for line in lines:
406            vals = line.split(self.delimiter)
407            if self.process_val:
408                yield [self.process_val(x) for x in vals]
409            else:
410                yield vals
411
412    def join(self, vals):
413        if self.delimiter is None:
414            delimiter = ' '
415        else:
416            delimiter = self.delimiter
417        return delimiter.join(str(x) for x in vals)
418
419
420class DefaultSplitter(BaseSplitter):
421    """Default class to split strings into columns using python csv.  The class
422    attributes are taken from the csv Dialect class.
423
424    Typical usage::
425
426      # lines = ..
427      splitter = ascii.DefaultSplitter()
428      for col_vals in splitter(lines):
429          for col_val in col_vals:
430               ...
431
432    """
433    delimiter = ' '
434    """ one-character string used to separate fields. """
435    quotechar = '"'
436    """ control how instances of *quotechar* in a field are quoted """
437    doublequote = True
438    """ character to remove special meaning from following character """
439    escapechar = None
440    """ one-character stringto quote fields containing special characters """
441    quoting = csv.QUOTE_MINIMAL
442    """ control when quotes are recognized by the reader """
443    skipinitialspace = True
444    """ ignore whitespace immediately following the delimiter """
445    csv_writer = None
446    csv_writer_out = StringIO()
447
448    def process_line(self, line):
449        """Remove whitespace at the beginning or end of line.  This is especially useful for
450        whitespace-delimited files to prevent spurious columns at the beginning or end.
451        If splitting on whitespace then replace unquoted tabs with space first"""
452        if self.delimiter == r'\s':
453            line = _replace_tab_with_space(line, self.escapechar, self.quotechar)
454        return line.strip()
455
456    def __call__(self, lines):
457        """Return an iterator over the table ``lines``, where each iterator output
458        is a list of the split line values.
459
460        Parameters
461        ----------
462        lines : list
463            List of table lines
464
465        Yields
466        ------
467        line : list of str
468            Each line's split values.
469
470        """
471        if self.process_line:
472            lines = [self.process_line(x) for x in lines]
473
474        delimiter = ' ' if self.delimiter == r'\s' else self.delimiter
475
476        csv_reader = csv.reader(lines,
477                                delimiter=delimiter,
478                                doublequote=self.doublequote,
479                                escapechar=self.escapechar,
480                                quotechar=self.quotechar,
481                                quoting=self.quoting,
482                                skipinitialspace=self.skipinitialspace
483                                )
484        for vals in csv_reader:
485            if self.process_val:
486                yield [self.process_val(x) for x in vals]
487            else:
488                yield vals
489
490    def join(self, vals):
491
492        delimiter = ' ' if self.delimiter is None else str(self.delimiter)
493
494        if self.csv_writer is None:
495            self.csv_writer = CsvWriter(delimiter=delimiter,
496                                        doublequote=self.doublequote,
497                                        escapechar=self.escapechar,
498                                        quotechar=self.quotechar,
499                                        quoting=self.quoting,
500                                        lineterminator='')
501        if self.process_val:
502            vals = [self.process_val(x) for x in vals]
503        out = self.csv_writer.writerow(vals)
504
505        return out
506
507
508def _replace_tab_with_space(line, escapechar, quotechar):
509    """Replace tabs with spaces in given string, preserving quoted substrings
510
511    Parameters
512    ----------
513    line : str
514        String containing tabs to be replaced with spaces.
515    escapechar : str
516        Character in ``line`` used to escape special characters.
517    quotechar : str
518        Character in ``line`` indicating the start/end of a substring.
519
520    Returns
521    -------
522    line : str
523        A copy of ``line`` with tabs replaced by spaces, preserving quoted substrings.
524    """
525    newline = []
526    in_quote = False
527    lastchar = 'NONE'
528    for char in line:
529        if char == quotechar and lastchar != escapechar:
530            in_quote = not in_quote
531        if char == '\t' and not in_quote:
532            char = ' '
533        lastchar = char
534        newline.append(char)
535    return ''.join(newline)
536
537
538def _get_line_index(line_or_func, lines):
539    """Return the appropriate line index, depending on ``line_or_func`` which
540    can be either a function, a positive or negative int, or None.
541    """
542
543    if hasattr(line_or_func, '__call__'):
544        return line_or_func(lines)
545    elif line_or_func:
546        if line_or_func >= 0:
547            return line_or_func
548        else:
549            n_lines = sum(1 for line in lines)
550            return n_lines + line_or_func
551    else:
552        return line_or_func
553
554
555class BaseHeader:
556    """
557    Base table header reader
558    """
559    auto_format = 'col{}'
560    """ format string for auto-generating column names """
561    start_line = None
562    """ None, int, or a function of ``lines`` that returns None or int """
563    comment = None
564    """ regular expression for comment lines """
565    splitter_class = DefaultSplitter
566    """ Splitter class for splitting data lines into columns """
567    names = None
568    """ list of names corresponding to each data column """
569    write_comment = False
570    write_spacer_lines = ['ASCII_TABLE_WRITE_SPACER_LINE']
571
572    def __init__(self):
573        self.splitter = self.splitter_class()
574
575    def _set_cols_from_names(self):
576        self.cols = [Column(name=x) for x in self.names]
577
578    def update_meta(self, lines, meta):
579        """
580        Extract any table-level metadata, e.g. keywords, comments, column metadata, from
581        the table ``lines`` and update the OrderedDict ``meta`` in place.  This base
582        method extracts comment lines and stores them in ``meta`` for output.
583        """
584        if self.comment:
585            re_comment = re.compile(self.comment)
586            comment_lines = [x for x in lines if re_comment.match(x)]
587        else:
588            comment_lines = []
589        comment_lines = [re.sub('^' + self.comment, '', x).strip()
590                         for x in comment_lines]
591        if comment_lines:
592            meta.setdefault('table', {})['comments'] = comment_lines
593
594    def get_cols(self, lines):
595        """Initialize the header Column objects from the table ``lines``.
596
597        Based on the previously set Header attributes find or create the column names.
598        Sets ``self.cols`` with the list of Columns.
599
600        Parameters
601        ----------
602        lines : list
603            List of table lines
604
605        """
606
607        start_line = _get_line_index(self.start_line, self.process_lines(lines))
608        if start_line is None:
609            # No header line so auto-generate names from n_data_cols
610            # Get the data values from the first line of table data to determine n_data_cols
611            try:
612                first_data_vals = next(self.data.get_str_vals())
613            except StopIteration:
614                raise InconsistentTableError('No data lines found so cannot autogenerate '
615                                             'column names')
616            n_data_cols = len(first_data_vals)
617            self.names = [self.auto_format.format(i)
618                          for i in range(1, n_data_cols + 1)]
619
620        else:
621            for i, line in enumerate(self.process_lines(lines)):
622                if i == start_line:
623                    break
624            else:  # No header line matching
625                raise ValueError('No header line found in table')
626
627            self.names = next(self.splitter([line]))
628
629        self._set_cols_from_names()
630
631    def process_lines(self, lines):
632        """Generator to yield non-blank and non-comment lines"""
633        re_comment = re.compile(self.comment) if self.comment else None
634        # Yield non-comment lines
635        for line in lines:
636            if line.strip() and (not self.comment or not re_comment.match(line)):
637                yield line
638
639    def write_comments(self, lines, meta):
640        if self.write_comment not in (False, None):
641            for comment in meta.get('comments', []):
642                lines.append(self.write_comment + comment)
643
644    def write(self, lines):
645        if self.start_line is not None:
646            for i, spacer_line in zip(range(self.start_line),
647                                      itertools.cycle(self.write_spacer_lines)):
648                lines.append(spacer_line)
649            lines.append(self.splitter.join([x.info.name for x in self.cols]))
650
651    @property
652    def colnames(self):
653        """Return the column names of the table"""
654        return tuple(col.name if isinstance(col, Column) else col.info.name
655                     for col in self.cols)
656
657    def remove_columns(self, names):
658        """
659        Remove several columns from the table.
660
661        Parameters
662        ----------
663        names : list
664            A list containing the names of the columns to remove
665        """
666        colnames = self.colnames
667        for name in names:
668            if name not in colnames:
669                raise KeyError(f"Column {name} does not exist")
670
671        self.cols = [col for col in self.cols if col.name not in names]
672
673    def rename_column(self, name, new_name):
674        """
675        Rename a column.
676
677        Parameters
678        ----------
679        name : str
680            The current name of the column.
681        new_name : str
682            The new name for the column
683        """
684        try:
685            idx = self.colnames.index(name)
686        except ValueError:
687            raise KeyError(f"Column {name} does not exist")
688
689        col = self.cols[idx]
690
691        # For writing self.cols can contain cols that are not Column.  Raise
692        # exception in that case.
693        if isinstance(col, Column):
694            col.name = new_name
695        else:
696            raise TypeError(f'got column type {type(col)} instead of required '
697                            f'{Column}')
698
699    def get_type_map_key(self, col):
700        return col.raw_type
701
702    def get_col_type(self, col):
703        try:
704            type_map_key = self.get_type_map_key(col)
705            return self.col_type_map[type_map_key.lower()]
706        except KeyError:
707            raise ValueError('Unknown data type ""{}"" for column "{}"'.format(
708                col.raw_type, col.name))
709
710    def check_column_names(self, names, strict_names, guessing):
711        """
712        Check column names.
713
714        This must be done before applying the names transformation
715        so that guessing will fail appropriately if ``names`` is supplied.
716        For instance if the basic reader is given a table with no column header
717        row.
718
719        Parameters
720        ----------
721        names : list
722            User-supplied list of column names
723        strict_names : bool
724            Whether to impose extra requirements on names
725        guessing : bool
726            True if this method is being called while guessing the table format
727        """
728        if strict_names:
729            # Impose strict requirements on column names (normally used in guessing)
730            bads = [" ", ",", "|", "\t", "'", '"']
731            for name in self.colnames:
732                if (_is_number(name) or len(name) == 0
733                        or name[0] in bads or name[-1] in bads):
734                    raise InconsistentTableError(
735                        f'Column name {name!r} does not meet strict name requirements')
736        # When guessing require at least two columns, except for ECSV which can
737        # reliably be guessed from the header requirements.
738        if guessing and len(self.colnames) <= 1 and self.__class__.__name__ != 'EcsvHeader':
739            raise ValueError('Table format guessing requires at least two columns, got {}'
740                             .format(list(self.colnames)))
741
742        if names is not None and len(names) != len(self.colnames):
743            raise InconsistentTableError(
744                'Length of names argument ({}) does not match number'
745                ' of table columns ({})'.format(len(names), len(self.colnames)))
746
747
748class BaseData:
749    """
750    Base table data reader.
751    """
752    start_line = None
753    """ None, int, or a function of ``lines`` that returns None or int """
754    end_line = None
755    """ None, int, or a function of ``lines`` that returns None or int """
756    comment = None
757    """ Regular expression for comment lines """
758    splitter_class = DefaultSplitter
759    """ Splitter class for splitting data lines into columns """
760    write_spacer_lines = ['ASCII_TABLE_WRITE_SPACER_LINE']
761    fill_include_names = None
762    fill_exclude_names = None
763    fill_values = [(masked, '')]
764    formats = {}
765
766    def __init__(self):
767        # Need to make sure fill_values list is instance attribute, not class attribute.
768        # On read, this will be overwritten by the default in the ui.read (thus, in
769        # the current implementation there can be no different default for different
770        # Readers). On write, ui.py does not specify a default, so this line here matters.
771        self.fill_values = copy.copy(self.fill_values)
772        self.formats = copy.copy(self.formats)
773        self.splitter = self.splitter_class()
774
775    def process_lines(self, lines):
776        """
777        READ: Strip out comment lines and blank lines from list of ``lines``
778
779        Parameters
780        ----------
781        lines : list
782            All lines in table
783
784        Returns
785        -------
786        lines : list
787            List of lines
788
789        """
790        nonblank_lines = (x for x in lines if x.strip())
791        if self.comment:
792            re_comment = re.compile(self.comment)
793            return [x for x in nonblank_lines if not re_comment.match(x)]
794        else:
795            return [x for x in nonblank_lines]
796
797    def get_data_lines(self, lines):
798        """READ: Set ``data_lines`` attribute to lines slice comprising table data values.
799        """
800        data_lines = self.process_lines(lines)
801        start_line = _get_line_index(self.start_line, data_lines)
802        end_line = _get_line_index(self.end_line, data_lines)
803
804        if start_line is not None or end_line is not None:
805            self.data_lines = data_lines[slice(start_line, end_line)]
806        else:  # Don't copy entire data lines unless necessary
807            self.data_lines = data_lines
808
809    def get_str_vals(self):
810        """Return a generator that returns a list of column values (as strings)
811        for each data line."""
812        return self.splitter(self.data_lines)
813
814    def masks(self, cols):
815        """READ: Set fill value for each column and then apply that fill value
816
817        In the first step it is evaluated with value from ``fill_values`` applies to
818        which column using ``fill_include_names`` and ``fill_exclude_names``.
819        In the second step all replacements are done for the appropriate columns.
820        """
821        if self.fill_values:
822            self._set_fill_values(cols)
823            self._set_masks(cols)
824
825    def _set_fill_values(self, cols):
826        """READ, WRITE: Set fill values of individual cols based on fill_values of BaseData
827
828        fill values has the following form:
829        <fill_spec> = (<bad_value>, <fill_value>, <optional col_name>...)
830        fill_values = <fill_spec> or list of <fill_spec>'s
831
832        """
833        if self.fill_values:
834            # when we write tables the columns may be astropy.table.Columns
835            # which don't carry a fill_values by default
836            for col in cols:
837                if not hasattr(col, 'fill_values'):
838                    col.fill_values = {}
839
840            # if input is only one <fill_spec>, then make it a list
841            with suppress(TypeError):
842                self.fill_values[0] + ''
843                self.fill_values = [self.fill_values]
844
845            # Step 1: Set the default list of columns which are affected by
846            # fill_values
847            colnames = set(self.header.colnames)
848            if self.fill_include_names is not None:
849                colnames.intersection_update(self.fill_include_names)
850            if self.fill_exclude_names is not None:
851                colnames.difference_update(self.fill_exclude_names)
852
853            # Step 2a: Find out which columns are affected by this tuple
854            # iterate over reversed order, so last condition is set first and
855            # overwritten by earlier conditions
856            for replacement in reversed(self.fill_values):
857                if len(replacement) < 2:
858                    raise ValueError("Format of fill_values must be "
859                                     "(<bad>, <fill>, <optional col1>, ...)")
860                elif len(replacement) == 2:
861                    affect_cols = colnames
862                else:
863                    affect_cols = replacement[2:]
864
865                for i, key in ((i, x) for i, x in enumerate(self.header.colnames)
866                               if x in affect_cols):
867                    cols[i].fill_values[replacement[0]] = str(replacement[1])
868
869    def _set_masks(self, cols):
870        """READ: Replace string values in col.str_vals and set masks"""
871        if self.fill_values:
872            for col in (col for col in cols if col.fill_values):
873                col.mask = numpy.zeros(len(col.str_vals), dtype=bool)
874                for i, str_val in ((i, x) for i, x in enumerate(col.str_vals)
875                                   if x in col.fill_values):
876                    col.str_vals[i] = col.fill_values[str_val]
877                    col.mask[i] = True
878
879    def _replace_vals(self, cols):
880        """WRITE: replace string values in col.str_vals"""
881        if self.fill_values:
882            for col in (col for col in cols if col.fill_values):
883                for i, str_val in ((i, x) for i, x in enumerate(col.str_vals)
884                                   if x in col.fill_values):
885                    col.str_vals[i] = col.fill_values[str_val]
886                if masked in col.fill_values and hasattr(col, 'mask'):
887                    mask_val = col.fill_values[masked]
888                    for i in col.mask.nonzero()[0]:
889                        col.str_vals[i] = mask_val
890
891    def str_vals(self):
892        """WRITE: convert all values in table to a list of lists of strings
893
894        This sets the fill values and possibly column formats from the input
895        formats={} keyword, then ends up calling table.pprint._pformat_col_iter()
896        by a circuitous path. That function does the real work of formatting.
897        Finally replace anything matching the fill_values.
898
899        Returns
900        -------
901        values : list of list of str
902        """
903        self._set_fill_values(self.cols)
904        self._set_col_formats()
905        for col in self.cols:
906            col.str_vals = list(col.info.iter_str_vals())
907        self._replace_vals(self.cols)
908        return [col.str_vals for col in self.cols]
909
910    def write(self, lines):
911        """Write ``self.cols`` in place to ``lines``.
912
913        Parameters
914        ----------
915        lines : list
916            List for collecting output of writing self.cols.
917        """
918        if hasattr(self.start_line, '__call__'):
919            raise TypeError('Start_line attribute cannot be callable for write()')
920        else:
921            data_start_line = self.start_line or 0
922
923        while len(lines) < data_start_line:
924            lines.append(itertools.cycle(self.write_spacer_lines))
925
926        col_str_iters = self.str_vals()
927        for vals in zip(*col_str_iters):
928            lines.append(self.splitter.join(vals))
929
930    def _set_col_formats(self):
931        """WRITE: set column formats."""
932        for col in self.cols:
933            if col.info.name in self.formats:
934                col.info.format = self.formats[col.info.name]
935
936
937def convert_numpy(numpy_type):
938    """Return a tuple containing a function which converts a list into a numpy
939    array and the type produced by the converter function.
940
941    Parameters
942    ----------
943    numpy_type : numpy data-type
944        The numpy type required of an array returned by ``converter``. Must be a
945        valid `numpy type <https://numpy.org/doc/stable/user/basics.types.html>`_
946        (e.g., numpy.uint, numpy.int8, numpy.int64, numpy.float64) or a python
947        type covered by a numpy type (e.g., int, float, str, bool).
948
949    Returns
950    -------
951    converter : callable
952        ``converter`` is a function which accepts a list and converts it to a
953        numpy array of type ``numpy_type``.
954    converter_type : type
955        ``converter_type`` tracks the generic data type produced by the
956        converter function.
957
958    Raises
959    ------
960    ValueError
961        Raised by ``converter`` if the list elements could not be converted to
962        the required type.
963    """
964
965    # Infer converter type from an instance of numpy_type.
966    type_name = numpy.array([], dtype=numpy_type).dtype.name
967    if 'int' in type_name:
968        converter_type = IntType
969    elif 'float' in type_name:
970        converter_type = FloatType
971    elif 'bool' in type_name:
972        converter_type = BoolType
973    elif 'str' in type_name:
974        converter_type = StrType
975    else:
976        converter_type = AllType
977
978    def bool_converter(vals):
979        """
980        Convert values "False" and "True" to bools.  Raise an exception
981        for any other string values.
982        """
983        if len(vals) == 0:
984            return numpy.array([], dtype=bool)
985
986        # Try a smaller subset first for a long array
987        if len(vals) > 10000:
988            svals = numpy.asarray(vals[:1000])
989            if not numpy.all((svals == 'False')
990                             | (svals == 'True')
991                             | (svals == '0')
992                             | (svals == '1')):
993                raise ValueError('bool input strings must be False, True, 0, 1, or ""')
994        vals = numpy.asarray(vals)
995
996        trues = (vals == 'True') | (vals == '1')
997        falses = (vals == 'False') | (vals == '0')
998        if not numpy.all(trues | falses):
999            raise ValueError('bool input strings must be only False, True, 0, 1, or ""')
1000
1001        return trues
1002
1003    def generic_converter(vals):
1004        return numpy.array(vals, numpy_type)
1005
1006    converter = bool_converter if converter_type is BoolType else generic_converter
1007
1008    return converter, converter_type
1009
1010
1011class BaseOutputter:
1012    """Output table as a dict of column objects keyed on column name.  The
1013    table data are stored as plain python lists within the column objects.
1014    """
1015    converters = {}
1016    # Derived classes must define default_converters and __call__
1017
1018    @staticmethod
1019    def _validate_and_copy(col, converters):
1020        """Validate the format for the type converters and then copy those
1021        which are valid converters for this column (i.e. converter type is
1022        a subclass of col.type)"""
1023        converters_out = []
1024        try:
1025            for converter in converters:
1026                converter_func, converter_type = converter
1027                if not issubclass(converter_type, NoType):
1028                    raise ValueError()
1029                if issubclass(converter_type, col.type):
1030                    converters_out.append((converter_func, converter_type))
1031
1032        except (ValueError, TypeError):
1033            raise ValueError('Error: invalid format for converters, see '
1034                             'documentation\n{}'.format(converters))
1035        return converters_out
1036
1037    def _convert_vals(self, cols):
1038        for col in cols:
1039            for key, converters in self.converters.items():
1040                if fnmatch.fnmatch(col.name, key):
1041                    break
1042            else:
1043                if col.dtype is not None:
1044                    converters = [convert_numpy(col.dtype)]
1045                else:
1046                    converters = self.default_converters
1047
1048            col.converters = self._validate_and_copy(col, converters)
1049
1050            # Catch the last error in order to provide additional information
1051            # in case all attempts at column conversion fail.  The initial
1052            # value of of last_error will apply if no converters are defined
1053            # and the first col.converters[0] access raises IndexError.
1054            last_err = 'no converters defined'
1055
1056            while not hasattr(col, 'data'):
1057                # Try converters, popping the unsuccessful ones from the list.
1058                # If there are no converters left here then fail.
1059                if not col.converters:
1060                    raise ValueError(f'Column {col.name} failed to convert: {last_err}')
1061
1062                converter_func, converter_type = col.converters[0]
1063                if not issubclass(converter_type, col.type):
1064                    raise TypeError('converter type does not match column type')
1065
1066                try:
1067                    col.data = converter_func(col.str_vals)
1068                    col.type = converter_type
1069                except (TypeError, ValueError) as err:
1070                    col.converters.pop(0)
1071                    last_err = err
1072                except OverflowError as err:
1073                    # Overflow during conversion (most likely an int that
1074                    # doesn't fit in native C long). Put string at the top of
1075                    # the converters list for the next while iteration.
1076                    warnings.warn(
1077                        "OverflowError converting to {} in column {}, reverting to String."
1078                        .format(converter_type.__name__, col.name), AstropyWarning)
1079                    col.converters.insert(0, convert_numpy(numpy.str))
1080                    last_err = err
1081
1082
1083def _deduplicate_names(names):
1084    """Ensure there are no duplicates in ``names``
1085
1086    This is done by iteratively adding ``_<N>`` to the name for increasing N
1087    until the name is unique.
1088    """
1089    new_names = []
1090    existing_names = set()
1091
1092    for name in names:
1093        base_name = name + '_'
1094        i = 1
1095        while name in existing_names:
1096            # Iterate until a unique name is found
1097            name = base_name + str(i)
1098            i += 1
1099        new_names.append(name)
1100        existing_names.add(name)
1101
1102    return new_names
1103
1104
1105class TableOutputter(BaseOutputter):
1106    """
1107    Output the table as an astropy.table.Table object.
1108    """
1109
1110    default_converters = [convert_numpy(int),
1111                          convert_numpy(float),
1112                          convert_numpy(str)]
1113
1114    def __call__(self, cols, meta):
1115        # Sets col.data to numpy array and col.type to io.ascii Type class (e.g.
1116        # FloatType) for each col.
1117        self._convert_vals(cols)
1118
1119        t_cols = [numpy.ma.MaskedArray(x.data, mask=x.mask)
1120                  if hasattr(x, 'mask') and numpy.any(x.mask)
1121                  else x.data for x in cols]
1122        out = Table(t_cols, names=[x.name for x in cols], meta=meta['table'])
1123
1124        for col, out_col in zip(cols, out.columns.values()):
1125            for attr in ('format', 'unit', 'description'):
1126                if hasattr(col, attr):
1127                    setattr(out_col, attr, getattr(col, attr))
1128            if hasattr(col, 'meta'):
1129                out_col.meta.update(col.meta)
1130
1131        return out
1132
1133
1134class MetaBaseReader(type):
1135    def __init__(cls, name, bases, dct):
1136        super().__init__(name, bases, dct)
1137
1138        format = dct.get('_format_name')
1139        if format is None:
1140            return
1141
1142        fast = dct.get('_fast')
1143        if fast is not None:
1144            FAST_CLASSES[format] = cls
1145
1146        FORMAT_CLASSES[format] = cls
1147
1148        io_formats = ['ascii.' + format] + dct.get('_io_registry_format_aliases', [])
1149
1150        if dct.get('_io_registry_suffix'):
1151            func = functools.partial(connect.io_identify, dct['_io_registry_suffix'])
1152            connect.io_registry.register_identifier(io_formats[0], Table, func)
1153
1154        for io_format in io_formats:
1155            func = functools.partial(connect.io_read, io_format)
1156            header = f"ASCII reader '{io_format}' details\n"
1157            func.__doc__ = (inspect.cleandoc(READ_DOCSTRING).strip() + '\n\n'
1158                            + header + re.sub('.', '=', header) + '\n')
1159            func.__doc__ += inspect.cleandoc(cls.__doc__).strip()
1160            connect.io_registry.register_reader(io_format, Table, func)
1161
1162            if dct.get('_io_registry_can_write', True):
1163                func = functools.partial(connect.io_write, io_format)
1164                header = f"ASCII writer '{io_format}' details\n"
1165                func.__doc__ = (inspect.cleandoc(WRITE_DOCSTRING).strip() + '\n\n'
1166                                + header + re.sub('.', '=', header) + '\n')
1167                func.__doc__ += inspect.cleandoc(cls.__doc__).strip()
1168                connect.io_registry.register_writer(io_format, Table, func)
1169
1170
1171def _is_number(x):
1172    with suppress(ValueError):
1173        x = float(x)
1174        return True
1175    return False
1176
1177
1178def _apply_include_exclude_names(table, names, include_names, exclude_names):
1179    """
1180    Apply names, include_names and exclude_names to a table or BaseHeader.
1181
1182    For the latter this relies on BaseHeader implementing ``colnames``,
1183    ``rename_column``, and ``remove_columns``.
1184
1185    Parameters
1186    ----------
1187    table : `~astropy.table.Table`, `~astropy.io.ascii.BaseHeader`
1188        Input table or BaseHeader subclass instance
1189    names : list
1190        List of names to override those in table (set to None to use existing names)
1191    include_names : list
1192        List of names to include in output
1193    exclude_names : list
1194        List of names to exclude from output (applied after ``include_names``)
1195
1196    """
1197    def rename_columns(table, names):
1198        # Rename table column names to those passed by user
1199        # Temporarily rename with names that are not in `names` or `table.colnames`.
1200        # This ensures that rename succeeds regardless of existing names.
1201        xxxs = 'x' * max(len(name) for name in list(names) + list(table.colnames))
1202        for ii, colname in enumerate(table.colnames):
1203            table.rename_column(colname, xxxs + str(ii))
1204
1205        for ii, name in enumerate(names):
1206            table.rename_column(xxxs + str(ii), name)
1207
1208    if names is not None:
1209        rename_columns(table, names)
1210    else:
1211        colnames_uniq = _deduplicate_names(table.colnames)
1212        if colnames_uniq != list(table.colnames):
1213            rename_columns(table, colnames_uniq)
1214
1215    names_set = set(table.colnames)
1216
1217    if include_names is not None:
1218        names_set.intersection_update(include_names)
1219    if exclude_names is not None:
1220        names_set.difference_update(exclude_names)
1221    if names_set != set(table.colnames):
1222        remove_names = set(table.colnames) - names_set
1223        table.remove_columns(remove_names)
1224
1225
1226class BaseReader(metaclass=MetaBaseReader):
1227    """Class providing methods to read and write an ASCII table using the specified
1228    header, data, inputter, and outputter instances.
1229
1230    Typical usage is to instantiate a Reader() object and customize the
1231    ``header``, ``data``, ``inputter``, and ``outputter`` attributes.  Each
1232    of these is an object of the corresponding class.
1233
1234    There is one method ``inconsistent_handler`` that can be used to customize the
1235    behavior of ``read()`` in the event that a data row doesn't match the header.
1236    The default behavior is to raise an InconsistentTableError.
1237
1238    """
1239
1240    names = None
1241    include_names = None
1242    exclude_names = None
1243    strict_names = False
1244    guessing = False
1245    encoding = None
1246
1247    header_class = BaseHeader
1248    data_class = BaseData
1249    inputter_class = BaseInputter
1250    outputter_class = TableOutputter
1251
1252    # Max column dimension that writer supports for this format. Exceptions
1253    # include ECSV (no limit) and HTML (max_ndim=2).
1254    max_ndim = 1
1255
1256    def __init__(self):
1257        self.header = self.header_class()
1258        self.data = self.data_class()
1259        self.inputter = self.inputter_class()
1260        self.outputter = self.outputter_class()
1261        # Data and Header instances benefit from a little cross-coupling.  Header may need to
1262        # know about number of data columns for auto-column name generation and Data may
1263        # need to know about header (e.g. for fixed-width tables where widths are spec'd in header.
1264        self.data.header = self.header
1265        self.header.data = self.data
1266
1267        # Metadata, consisting of table-level meta and column-level meta.  The latter
1268        # could include information about column type, description, formatting, etc,
1269        # depending on the table meta format.
1270        self.meta = OrderedDict(table=OrderedDict(),
1271                                cols=OrderedDict())
1272
1273    def _check_multidim_table(self, table):
1274        """Check that the dimensions of columns in ``table`` are acceptable.
1275
1276        The reader class attribute ``max_ndim`` defines the maximum dimension of
1277        columns that can be written using this format. The base value is ``1``,
1278        corresponding to normal scalar columns with just a length.
1279
1280        Parameters
1281        ----------
1282        table : `~astropy.table.Table`
1283            Input table.
1284
1285        Raises
1286        ------
1287        ValueError
1288            If any column exceeds the number of allowed dimensions
1289        """
1290        _check_multidim_table(table, self.max_ndim)
1291
1292    def read(self, table):
1293        """Read the ``table`` and return the results in a format determined by
1294        the ``outputter`` attribute.
1295
1296        The ``table`` parameter is any string or object that can be processed
1297        by the instance ``inputter``.  For the base Inputter class ``table`` can be
1298        one of:
1299
1300        * File name
1301        * File-like object
1302        * String (newline separated) with all header and data lines (must have at least 2 lines)
1303        * List of strings
1304
1305        Parameters
1306        ----------
1307        table : str, file-like, list
1308            Input table.
1309
1310        Returns
1311        -------
1312        table : `~astropy.table.Table`
1313            Output table
1314
1315        """
1316        # If ``table`` is a file then store the name in the ``data``
1317        # attribute. The ``table`` is a "file" if it is a string
1318        # without the new line specific to the OS.
1319        with suppress(TypeError):
1320            # Strings only
1321            if os.linesep not in table + '':
1322                self.data.table_name = os.path.basename(table)
1323
1324        # If one of the newline chars is set as field delimiter, only
1325        # accept the other one as line splitter
1326        if self.header.splitter.delimiter == '\n':
1327            newline = '\r'
1328        elif self.header.splitter.delimiter == '\r':
1329            newline = '\n'
1330        else:
1331            newline = None
1332
1333        # Get a list of the lines (rows) in the table
1334        self.lines = self.inputter.get_lines(table, newline=newline)
1335
1336        # Set self.data.data_lines to a slice of lines contain the data rows
1337        self.data.get_data_lines(self.lines)
1338
1339        # Extract table meta values (e.g. keywords, comments, etc).  Updates self.meta.
1340        self.header.update_meta(self.lines, self.meta)
1341
1342        # Get the table column definitions
1343        self.header.get_cols(self.lines)
1344
1345        # Make sure columns are valid
1346        self.header.check_column_names(self.names, self.strict_names, self.guessing)
1347
1348        self.cols = cols = self.header.cols
1349        self.data.splitter.cols = cols
1350        n_cols = len(cols)
1351
1352        for i, str_vals in enumerate(self.data.get_str_vals()):
1353            if len(str_vals) != n_cols:
1354                str_vals = self.inconsistent_handler(str_vals, n_cols)
1355
1356                # if str_vals is None, we skip this row
1357                if str_vals is None:
1358                    continue
1359
1360                # otherwise, we raise an error only if it is still inconsistent
1361                if len(str_vals) != n_cols:
1362                    errmsg = ('Number of header columns ({}) inconsistent with'
1363                              ' data columns ({}) at data line {}\n'
1364                              'Header values: {}\n'
1365                              'Data values: {}'.format(
1366                                  n_cols, len(str_vals), i,
1367                                  [x.name for x in cols], str_vals))
1368
1369                    raise InconsistentTableError(errmsg)
1370
1371            for j, col in enumerate(cols):
1372                col.str_vals.append(str_vals[j])
1373
1374        self.data.masks(cols)
1375        if hasattr(self.header, 'table_meta'):
1376            self.meta['table'].update(self.header.table_meta)
1377
1378        _apply_include_exclude_names(self.header, self.names,
1379                                     self.include_names, self.exclude_names)
1380
1381        table = self.outputter(self.header.cols, self.meta)
1382        self.cols = self.header.cols
1383
1384        return table
1385
1386    def inconsistent_handler(self, str_vals, ncols):
1387        """
1388        Adjust or skip data entries if a row is inconsistent with the header.
1389
1390        The default implementation does no adjustment, and hence will always trigger
1391        an exception in read() any time the number of data entries does not match
1392        the header.
1393
1394        Note that this will *not* be called if the row already matches the header.
1395
1396        Parameters
1397        ----------
1398        str_vals : list
1399            A list of value strings from the current row of the table.
1400        ncols : int
1401            The expected number of entries from the table header.
1402
1403        Returns
1404        -------
1405        str_vals : list
1406            List of strings to be parsed into data entries in the output table. If
1407            the length of this list does not match ``ncols``, an exception will be
1408            raised in read().  Can also be None, in which case the row will be
1409            skipped.
1410        """
1411        # an empty list will always trigger an InconsistentTableError in read()
1412        return str_vals
1413
1414    @property
1415    def comment_lines(self):
1416        """Return lines in the table that match header.comment regexp"""
1417        if not hasattr(self, 'lines'):
1418            raise ValueError('Table must be read prior to accessing the header comment lines')
1419        if self.header.comment:
1420            re_comment = re.compile(self.header.comment)
1421            comment_lines = [x for x in self.lines if re_comment.match(x)]
1422        else:
1423            comment_lines = []
1424        return comment_lines
1425
1426    def update_table_data(self, table):
1427        """
1428        Update table columns in place if needed.
1429
1430        This is a hook to allow updating the table columns after name
1431        filtering but before setting up to write the data.  This is currently
1432        only used by ECSV and is otherwise just a pass-through.
1433
1434        Parameters
1435        ----------
1436        table : `astropy.table.Table`
1437            Input table for writing
1438
1439        Returns
1440        -------
1441        table : `astropy.table.Table`
1442            Output table for writing
1443        """
1444        return table
1445
1446    def write_header(self, lines, meta):
1447        self.header.write_comments(lines, meta)
1448        self.header.write(lines)
1449
1450    def write(self, table):
1451        """
1452        Write ``table`` as list of strings.
1453
1454        Parameters
1455        ----------
1456        table : `~astropy.table.Table`
1457            Input table data.
1458
1459        Returns
1460        -------
1461        lines : list
1462            List of strings corresponding to ASCII table
1463
1464        """
1465
1466        # Check column names before altering
1467        self.header.cols = list(table.columns.values())
1468        self.header.check_column_names(self.names, self.strict_names, False)
1469
1470        # In-place update of columns in input ``table`` to reflect column
1471        # filtering.  Note that ``table`` is guaranteed to be a copy of the
1472        # original user-supplied table.
1473        _apply_include_exclude_names(table, self.names, self.include_names, self.exclude_names)
1474
1475        # This is a hook to allow updating the table columns after name
1476        # filtering but before setting up to write the data.  This is currently
1477        # only used by ECSV and is otherwise just a pass-through.
1478        table = self.update_table_data(table)
1479
1480        # Check that table column dimensions are supported by this format class.
1481        # Most formats support only 1-d columns, but some like ECSV support N-d.
1482        self._check_multidim_table(table)
1483
1484        # Now use altered columns
1485        new_cols = list(table.columns.values())
1486        # link information about the columns to the writer object (i.e. self)
1487        self.header.cols = new_cols
1488        self.data.cols = new_cols
1489        self.header.table_meta = table.meta
1490
1491        # Write header and data to lines list
1492        lines = []
1493        self.write_header(lines, table.meta)
1494        self.data.write(lines)
1495
1496        return lines
1497
1498
1499class ContinuationLinesInputter(BaseInputter):
1500    """Inputter where lines ending in ``continuation_char`` are joined
1501    with the subsequent line.  Example::
1502
1503      col1 col2 col3
1504      1 \
1505      2 3
1506      4 5 \
1507      6
1508    """
1509
1510    continuation_char = '\\'
1511    replace_char = ' '
1512    # If no_continue is not None then lines matching this regex are not subject
1513    # to line continuation.  The initial use case here is Daophot.  In this
1514    # case the continuation character is just replaced with replace_char.
1515    no_continue = None
1516
1517    def process_lines(self, lines):
1518        re_no_continue = re.compile(self.no_continue) if self.no_continue else None
1519
1520        parts = []
1521        outlines = []
1522        for line in lines:
1523            if re_no_continue and re_no_continue.match(line):
1524                line = line.replace(self.continuation_char, self.replace_char)
1525            if line.endswith(self.continuation_char):
1526                parts.append(line.replace(self.continuation_char, self.replace_char))
1527            else:
1528                parts.append(line)
1529                outlines.append(''.join(parts))
1530                parts = []
1531
1532        return outlines
1533
1534
1535class WhitespaceSplitter(DefaultSplitter):
1536    def process_line(self, line):
1537        """Replace tab with space within ``line`` while respecting quoted substrings"""
1538        newline = []
1539        in_quote = False
1540        lastchar = None
1541        for char in line:
1542            if char == self.quotechar and (self.escapechar is None
1543                                           or lastchar != self.escapechar):
1544                in_quote = not in_quote
1545            if char == '\t' and not in_quote:
1546                char = ' '
1547            lastchar = char
1548            newline.append(char)
1549
1550        return ''.join(newline)
1551
1552
1553extra_reader_pars = ('Reader', 'Inputter', 'Outputter',
1554                     'delimiter', 'comment', 'quotechar', 'header_start',
1555                     'data_start', 'data_end', 'converters', 'encoding',
1556                     'data_Splitter', 'header_Splitter',
1557                     'names', 'include_names', 'exclude_names', 'strict_names',
1558                     'fill_values', 'fill_include_names', 'fill_exclude_names')
1559
1560
1561def _get_reader(Reader, Inputter=None, Outputter=None, **kwargs):
1562    """Initialize a table reader allowing for common customizations.  See ui.get_reader()
1563    for param docs.  This routine is for internal (package) use only and is useful
1564    because it depends only on the "core" module.
1565    """
1566
1567    from .fastbasic import FastBasic
1568    if issubclass(Reader, FastBasic):  # Fast readers handle args separately
1569        if Inputter is not None:
1570            kwargs['Inputter'] = Inputter
1571        return Reader(**kwargs)
1572
1573    # If user explicitly passed a fast reader with enable='force'
1574    # (e.g. by passing non-default options), raise an error for slow readers
1575    if 'fast_reader' in kwargs:
1576        if kwargs['fast_reader']['enable'] == 'force':
1577            raise ParameterError('fast_reader required with '
1578                                 '{}, but this is not a fast C reader: {}'
1579                                 .format(kwargs['fast_reader'], Reader))
1580        else:
1581            del kwargs['fast_reader']  # Otherwise ignore fast_reader parameter
1582
1583    reader_kwargs = dict([k, v] for k, v in kwargs.items() if k not in extra_reader_pars)
1584    reader = Reader(**reader_kwargs)
1585
1586    if Inputter is not None:
1587        reader.inputter = Inputter()
1588
1589    if Outputter is not None:
1590        reader.outputter = Outputter()
1591
1592    # Issue #855 suggested to set data_start to header_start + default_header_length
1593    # Thus, we need to retrieve this from the class definition before resetting these numbers.
1594    try:
1595        default_header_length = reader.data.start_line - reader.header.start_line
1596    except TypeError:  # Start line could be None or an instancemethod
1597        default_header_length = None
1598
1599    # csv.reader is hard-coded to recognise either '\r' or '\n' as end-of-line,
1600    # therefore DefaultSplitter cannot handle these as delimiters.
1601    if 'delimiter' in kwargs:
1602        if kwargs['delimiter'] in ('\n', '\r', '\r\n'):
1603            reader.header.splitter = BaseSplitter()
1604            reader.data.splitter = BaseSplitter()
1605        reader.header.splitter.delimiter = kwargs['delimiter']
1606        reader.data.splitter.delimiter = kwargs['delimiter']
1607    if 'comment' in kwargs:
1608        reader.header.comment = kwargs['comment']
1609        reader.data.comment = kwargs['comment']
1610    if 'quotechar' in kwargs:
1611        reader.header.splitter.quotechar = kwargs['quotechar']
1612        reader.data.splitter.quotechar = kwargs['quotechar']
1613    if 'data_start' in kwargs:
1614        reader.data.start_line = kwargs['data_start']
1615    if 'data_end' in kwargs:
1616        reader.data.end_line = kwargs['data_end']
1617    if 'header_start' in kwargs:
1618        if (reader.header.start_line is not None):
1619            reader.header.start_line = kwargs['header_start']
1620            # For FixedWidthTwoLine the data_start is calculated relative to the position line.
1621            # However, position_line is given as absolute number and not relative to header_start.
1622            # So, ignore this Reader here.
1623            if (('data_start' not in kwargs) and (default_header_length is not None)
1624                    and reader._format_name not in ['fixed_width_two_line', 'commented_header']):
1625                reader.data.start_line = reader.header.start_line + default_header_length
1626        elif kwargs['header_start'] is not None:
1627            # User trying to set a None header start to some value other than None
1628            raise ValueError('header_start cannot be modified for this Reader')
1629    if 'converters' in kwargs:
1630        reader.outputter.converters = kwargs['converters']
1631    if 'data_Splitter' in kwargs:
1632        reader.data.splitter = kwargs['data_Splitter']()
1633    if 'header_Splitter' in kwargs:
1634        reader.header.splitter = kwargs['header_Splitter']()
1635    if 'names' in kwargs:
1636        reader.names = kwargs['names']
1637        if None in reader.names:
1638            raise TypeError('Cannot have None for column name')
1639        if len(set(reader.names)) != len(reader.names):
1640            raise ValueError('Duplicate column names')
1641    if 'include_names' in kwargs:
1642        reader.include_names = kwargs['include_names']
1643    if 'exclude_names' in kwargs:
1644        reader.exclude_names = kwargs['exclude_names']
1645    # Strict names is normally set only within the guessing process to
1646    # indicate that column names cannot be numeric or have certain
1647    # characters at the beginning or end.  It gets used in
1648    # BaseHeader.check_column_names().
1649    if 'strict_names' in kwargs:
1650        reader.strict_names = kwargs['strict_names']
1651    if 'fill_values' in kwargs:
1652        reader.data.fill_values = kwargs['fill_values']
1653    if 'fill_include_names' in kwargs:
1654        reader.data.fill_include_names = kwargs['fill_include_names']
1655    if 'fill_exclude_names' in kwargs:
1656        reader.data.fill_exclude_names = kwargs['fill_exclude_names']
1657    if 'encoding' in kwargs:
1658        reader.encoding = kwargs['encoding']
1659        reader.inputter.encoding = kwargs['encoding']
1660
1661    return reader
1662
1663
1664extra_writer_pars = ('delimiter', 'comment', 'quotechar', 'formats',
1665                     'strip_whitespace',
1666                     'names', 'include_names', 'exclude_names',
1667                     'fill_values', 'fill_include_names',
1668                     'fill_exclude_names')
1669
1670
1671def _get_writer(Writer, fast_writer, **kwargs):
1672    """Initialize a table writer allowing for common customizations. This
1673    routine is for internal (package) use only and is useful because it depends
1674    only on the "core" module. """
1675
1676    from .fastbasic import FastBasic
1677
1678    # A value of None for fill_values imply getting the default string
1679    # representation of masked values (depending on the writer class), but the
1680    # machinery expects a list.  The easiest here is to just pop the value off,
1681    # i.e. fill_values=None is the same as not providing it at all.
1682    if 'fill_values' in kwargs and kwargs['fill_values'] is None:
1683        del kwargs['fill_values']
1684
1685    if issubclass(Writer, FastBasic):  # Fast writers handle args separately
1686        return Writer(**kwargs)
1687    elif fast_writer and f'fast_{Writer._format_name}' in FAST_CLASSES:
1688        # Switch to fast writer
1689        kwargs['fast_writer'] = fast_writer
1690        return FAST_CLASSES[f'fast_{Writer._format_name}'](**kwargs)
1691
1692    writer_kwargs = dict([k, v] for k, v in kwargs.items() if k not in extra_writer_pars)
1693    writer = Writer(**writer_kwargs)
1694
1695    if 'delimiter' in kwargs:
1696        writer.header.splitter.delimiter = kwargs['delimiter']
1697        writer.data.splitter.delimiter = kwargs['delimiter']
1698    if 'comment' in kwargs:
1699        writer.header.write_comment = kwargs['comment']
1700        writer.data.write_comment = kwargs['comment']
1701    if 'quotechar' in kwargs:
1702        writer.header.splitter.quotechar = kwargs['quotechar']
1703        writer.data.splitter.quotechar = kwargs['quotechar']
1704    if 'formats' in kwargs:
1705        writer.data.formats = kwargs['formats']
1706    if 'strip_whitespace' in kwargs:
1707        if kwargs['strip_whitespace']:
1708            # Restore the default SplitterClass process_val method which strips
1709            # whitespace.  This may have been changed in the Writer
1710            # initialization (e.g. Rdb and Tab)
1711            writer.data.splitter.process_val = operator.methodcaller('strip')
1712        else:
1713            writer.data.splitter.process_val = None
1714    if 'names' in kwargs:
1715        writer.header.names = kwargs['names']
1716    if 'include_names' in kwargs:
1717        writer.include_names = kwargs['include_names']
1718    if 'exclude_names' in kwargs:
1719        writer.exclude_names = kwargs['exclude_names']
1720    if 'fill_values' in kwargs:
1721        # Prepend user-specified values to the class default.
1722        with suppress(TypeError, IndexError):
1723            # Test if it looks like (match, replace_string, optional_colname),
1724            # in which case make it a list
1725            kwargs['fill_values'][1] + ''
1726            kwargs['fill_values'] = [kwargs['fill_values']]
1727        writer.data.fill_values = kwargs['fill_values'] + writer.data.fill_values
1728    if 'fill_include_names' in kwargs:
1729        writer.data.fill_include_names = kwargs['fill_include_names']
1730    if 'fill_exclude_names' in kwargs:
1731        writer.data.fill_exclude_names = kwargs['fill_exclude_names']
1732    return writer
1733