1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
3"""
4This file contains a contains the high-level functions to read a
5VOTable file.
6"""
7
8# STDLIB
9import io
10import os
11import sys
12import textwrap
13import warnings
14
15# LOCAL
16from . import exceptions
17from . import tree
18from astropy.utils.xml import iterparser
19from astropy.utils import data
20from astropy.utils.decorators import deprecated_renamed_argument
21
22__all__ = ['parse', 'parse_single_table', 'from_table', 'writeto', 'validate',
23           'reset_vo_warnings']
24
25VERIFY_OPTIONS = ['ignore', 'warn', 'exception']
26
27
28@deprecated_renamed_argument('pedantic', 'verify', since='5.0')
29def parse(source, columns=None, invalid='exception', verify=None,
30          chunk_size=tree.DEFAULT_CHUNK_SIZE, table_number=None,
31          table_id=None, filename=None, unit_format=None,
32          datatype_mapping=None, _debug_python_based_parser=False):
33    """
34    Parses a VOTABLE_ xml file (or file-like object), and returns a
35    `~astropy.io.votable.tree.VOTableFile` object.
36
37    Parameters
38    ----------
39    source : path-like or file-like
40        Path or file-like object containing a VOTABLE_ xml file.
41        If file, must be readable.
42
43    columns : sequence of str, optional
44        List of field names to include in the output.  The default is
45        to include all fields.
46
47    invalid : str, optional
48        One of the following values:
49
50            - 'exception': throw an exception when an invalid value is
51              encountered (default)
52
53            - 'mask': mask out invalid values
54
55    verify : {'ignore', 'warn', 'exception'}, optional
56        When ``'exception'``, raise an error when the file violates the spec,
57        otherwise either issue a warning (``'warn'``) or silently continue
58        (``'ignore'``). Warnings may be controlled using the standard Python
59        mechanisms.  See the `warnings` module in the Python standard library
60        for more information. When not provided, uses the configuration setting
61        ``astropy.io.votable.verify``, which defaults to 'ignore'.
62
63        .. versionchanged:: 4.0
64           ``verify`` replaces the ``pedantic`` argument, which will be
65           deprecated in future.
66        .. versionchanged:: 5.0
67            The ``pedantic`` argument is deprecated.
68
69    chunk_size : int, optional
70        The number of rows to read before converting to an array.
71        Higher numbers are likely to be faster, but will consume more
72        memory.
73
74    table_number : int, optional
75        The number of table in the file to read in.  If `None`, all
76        tables will be read.  If a number, 0 refers to the first table
77        in the file, and only that numbered table will be parsed and
78        read in.  Should not be used with ``table_id``.
79
80    table_id : str, optional
81        The ID of the table in the file to read in.  Should not be
82        used with ``table_number``.
83
84    filename : str, optional
85        A filename, URL or other identifier to use in error messages.
86        If *filename* is None and *source* is a string (i.e. a path),
87        then *source* will be used as a filename for error messages.
88        Therefore, *filename* is only required when source is a
89        file-like object.
90
91    unit_format : str, astropy.units.format.Base instance or None, optional
92        The unit format to use when parsing unit attributes.  If a
93        string, must be the name of a unit formatter. The built-in
94        formats include ``generic``, ``fits``, ``cds``, and
95        ``vounit``.  A custom formatter may be provided by passing a
96        `~astropy.units.UnitBase` instance.  If `None` (default),
97        the unit format to use will be the one specified by the
98        VOTable specification (which is ``cds`` up to version 1.3 of
99        VOTable, and ``vounit`` in more recent versions of the spec).
100
101    datatype_mapping : dict, optional
102        A mapping of datatype names (`str`) to valid VOTable datatype names
103        (str). For example, if the file being read contains the datatype
104        "unsignedInt" (an invalid datatype in VOTable), include the mapping
105        ``{"unsignedInt": "long"}``.
106
107    Returns
108    -------
109    votable : `~astropy.io.votable.tree.VOTableFile` object
110
111    See also
112    --------
113    astropy.io.votable.exceptions : The exceptions this function may raise.
114    """
115    from . import conf
116
117    invalid = invalid.lower()
118    if invalid not in ('exception', 'mask'):
119        raise ValueError("accepted values of ``invalid`` are: "
120                         "``'exception'`` or ``'mask'``.")
121
122    if verify is None:
123
124        conf_verify_lowercase = conf.verify.lower()
125
126        # We need to allow verify to be booleans as strings since the
127        # configuration framework doesn't make it easy/possible to have mixed
128        # types.
129        if conf_verify_lowercase in ['false', 'true']:
130            verify = conf_verify_lowercase == 'true'
131        else:
132            verify = conf_verify_lowercase
133
134    if isinstance(verify, bool):
135        verify = 'exception' if verify else 'warn'
136    elif verify not in VERIFY_OPTIONS:
137        raise ValueError(f"verify should be one of {'/'.join(VERIFY_OPTIONS)}")
138
139    if datatype_mapping is None:
140        datatype_mapping = {}
141
142    config = {
143        'columns': columns,
144        'invalid': invalid,
145        'verify': verify,
146        'chunk_size': chunk_size,
147        'table_number': table_number,
148        'filename': filename,
149        'unit_format': unit_format,
150        'datatype_mapping': datatype_mapping
151    }
152
153    if filename is None and isinstance(source, str):
154        config['filename'] = source
155
156    with iterparser.get_xml_iterator(
157            source,
158            _debug_python_based_parser=_debug_python_based_parser) as iterator:
159        return tree.VOTableFile(
160            config=config, pos=(1, 1)).parse(iterator, config)
161
162
163def parse_single_table(source, **kwargs):
164    """
165    Parses a VOTABLE_ xml file (or file-like object), reading and
166    returning only the first `~astropy.io.votable.tree.Table`
167    instance.
168
169    See `parse` for a description of the keyword arguments.
170
171    Returns
172    -------
173    votable : `~astropy.io.votable.tree.Table` object
174    """
175    if kwargs.get('table_number') is None:
176        kwargs['table_number'] = 0
177
178    votable = parse(source, **kwargs)
179
180    return votable.get_first_table()
181
182
183def writeto(table, file, tabledata_format=None):
184    """
185    Writes a `~astropy.io.votable.tree.VOTableFile` to a VOTABLE_ xml file.
186
187    Parameters
188    ----------
189    table : `~astropy.io.votable.tree.VOTableFile` or `~astropy.table.Table` instance.
190
191    file : str or writable file-like
192        Path or file object to write to
193
194    tabledata_format : str, optional
195        Override the format of the table(s) data to write.  Must be
196        one of ``tabledata`` (text representation), ``binary`` or
197        ``binary2``.  By default, use the format that was specified in
198        each ``table`` object as it was created or read in.  See
199        :ref:`astropy:astropy:votable-serialization`.
200    """
201    from astropy.table import Table
202    if isinstance(table, Table):
203        table = tree.VOTableFile.from_table(table)
204    elif not isinstance(table, tree.VOTableFile):
205        raise TypeError(
206            "first argument must be astropy.io.vo.VOTableFile or "
207            "astropy.table.Table instance")
208    table.to_xml(file, tabledata_format=tabledata_format,
209                 _debug_python_based_parser=True)
210
211
212def validate(source, output=None, xmllint=False, filename=None):
213    """
214    Prints a validation report for the given file.
215
216    Parameters
217    ----------
218    source : path-like or file-like
219        Path to a VOTABLE_ xml file or `~pathlib.Path`
220        object having Path to a VOTABLE_ xml file.
221        If file-like object, must be readable.
222
223    output : file-like, optional
224        Where to output the report.  Defaults to ``sys.stdout``.
225        If `None`, the output will be returned as a string.
226        Must be writable.
227
228    xmllint : bool, optional
229        When `True`, also send the file to ``xmllint`` for schema and
230        DTD validation.  Requires that ``xmllint`` is installed.  The
231        default is `False`.  ``source`` must be a file on the local
232        filesystem in order for ``xmllint`` to work.
233
234    filename : str, optional
235        A filename to use in the error messages.  If not provided, one
236        will be automatically determined from ``source``.
237
238    Returns
239    -------
240    is_valid : bool or str
241        Returns `True` if no warnings were found.  If ``output`` is
242        `None`, the return value will be a string.
243    """
244
245    from astropy.utils.console import print_code_line, color_print
246
247    if output is None:
248        output = sys.stdout
249
250    return_as_str = False
251    if output is None:
252        output = io.StringIO()
253
254    lines = []
255    votable = None
256
257    reset_vo_warnings()
258
259    with data.get_readable_fileobj(source, encoding='binary') as fd:
260        content = fd.read()
261    content_buffer = io.BytesIO(content)
262    content_buffer.seek(0)
263
264    if filename is None:
265        if isinstance(source, str):
266            filename = source
267        elif hasattr(source, 'name'):
268            filename = source.name
269        elif hasattr(source, 'url'):
270            filename = source.url
271        else:
272            filename = "<unknown>"
273
274    with warnings.catch_warnings(record=True) as warning_lines:
275        warnings.resetwarnings()
276        warnings.simplefilter("always", exceptions.VOWarning, append=True)
277        try:
278            votable = parse(content_buffer, verify='warn', filename=filename)
279        except ValueError as e:
280            lines.append(str(e))
281
282    lines = [str(x.message) for x in warning_lines if
283             issubclass(x.category, exceptions.VOWarning)] + lines
284
285    content_buffer.seek(0)
286    output.write(f"Validation report for {filename}\n\n")
287
288    if len(lines):
289        xml_lines = iterparser.xml_readlines(content_buffer)
290
291        for warning in lines:
292            w = exceptions.parse_vowarning(warning)
293
294            if not w['is_something']:
295                output.write(w['message'])
296                output.write('\n\n')
297            else:
298                line = xml_lines[w['nline'] - 1]
299                warning = w['warning']
300                if w['is_warning']:
301                    color = 'yellow'
302                else:
303                    color = 'red'
304                color_print(
305                    f"{w['nline']:d}: ", '',
306                    warning or 'EXC', color,
307                    ': ', '',
308                    textwrap.fill(
309                        w['message'],
310                        initial_indent='          ',
311                        subsequent_indent='  ').lstrip(),
312                    file=output)
313                print_code_line(line, w['nchar'], file=output)
314            output.write('\n')
315    else:
316        output.write('astropy.io.votable found no violations.\n\n')
317
318    success = 0
319    if xmllint and os.path.exists(filename):
320        from . import xmlutil
321
322        if votable is None:
323            version = "1.1"
324        else:
325            version = votable.version
326        success, stdout, stderr = xmlutil.validate_schema(
327            filename, version)
328
329        if success != 0:
330            output.write(
331                'xmllint schema violations:\n\n')
332            output.write(stderr.decode('utf-8'))
333        else:
334            output.write('xmllint passed\n')
335
336    if return_as_str:
337        return output.getvalue()
338    return len(lines) == 0 and success == 0
339
340
341def from_table(table, table_id=None):
342    """
343    Given an `~astropy.table.Table` object, return a
344    `~astropy.io.votable.tree.VOTableFile` file structure containing
345    just that single table.
346
347    Parameters
348    ----------
349    table : `~astropy.table.Table` instance
350
351    table_id : str, optional
352        If not `None`, set the given id on the returned
353        `~astropy.io.votable.tree.Table` instance.
354
355    Returns
356    -------
357    votable : `~astropy.io.votable.tree.VOTableFile` instance
358    """
359    return tree.VOTableFile.from_table(table, table_id=table_id)
360
361
362def is_votable(source):
363    """
364    Reads the header of a file to determine if it is a VOTable file.
365
366    Parameters
367    ----------
368    source : path-like or file-like
369        Path or file object containing a VOTABLE_ xml file.
370        If file, must be readable.
371
372    Returns
373    -------
374    is_votable : bool
375        Returns `True` if the given file is a VOTable file.
376    """
377    try:
378        with iterparser.get_xml_iterator(source) as iterator:
379            for start, tag, d, pos in iterator:
380                if tag != 'xml':
381                    return False
382                break
383
384            for start, tag, d, pos in iterator:
385                if tag != 'VOTABLE':
386                    return False
387                break
388
389            return True
390    except ValueError:
391        return False
392
393
394def reset_vo_warnings():
395    """
396    Resets all of the vo warning state so that warnings that
397    have already been emitted will be emitted again. This is
398    used, for example, by `validate` which must emit all
399    warnings each time it is called.
400
401    """
402    from . import converters, xmlutil
403
404    # -----------------------------------------------------------#
405    #  This is a special variable used by the Python warnings    #
406    #  infrastructure to keep track of warnings that have        #
407    #  already been seen.  Since we want to get every single     #
408    #  warning out of this, we have to delete all of them first. #
409    # -----------------------------------------------------------#
410    for module in (converters, exceptions, tree, xmlutil):
411        try:
412            del module.__warningregistry__
413        except AttributeError:
414            pass
415