1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2 3""" 4This file contains a contains the high-level functions to read a 5VOTable file. 6""" 7 8# STDLIB 9import io 10import os 11import sys 12import textwrap 13import warnings 14 15# LOCAL 16from . import exceptions 17from . import tree 18from astropy.utils.xml import iterparser 19from astropy.utils import data 20from astropy.utils.decorators import deprecated_renamed_argument 21 22__all__ = ['parse', 'parse_single_table', 'from_table', 'writeto', 'validate', 23 'reset_vo_warnings'] 24 25VERIFY_OPTIONS = ['ignore', 'warn', 'exception'] 26 27 28@deprecated_renamed_argument('pedantic', 'verify', since='5.0') 29def parse(source, columns=None, invalid='exception', verify=None, 30 chunk_size=tree.DEFAULT_CHUNK_SIZE, table_number=None, 31 table_id=None, filename=None, unit_format=None, 32 datatype_mapping=None, _debug_python_based_parser=False): 33 """ 34 Parses a VOTABLE_ xml file (or file-like object), and returns a 35 `~astropy.io.votable.tree.VOTableFile` object. 36 37 Parameters 38 ---------- 39 source : path-like or file-like 40 Path or file-like object containing a VOTABLE_ xml file. 41 If file, must be readable. 42 43 columns : sequence of str, optional 44 List of field names to include in the output. The default is 45 to include all fields. 46 47 invalid : str, optional 48 One of the following values: 49 50 - 'exception': throw an exception when an invalid value is 51 encountered (default) 52 53 - 'mask': mask out invalid values 54 55 verify : {'ignore', 'warn', 'exception'}, optional 56 When ``'exception'``, raise an error when the file violates the spec, 57 otherwise either issue a warning (``'warn'``) or silently continue 58 (``'ignore'``). Warnings may be controlled using the standard Python 59 mechanisms. See the `warnings` module in the Python standard library 60 for more information. When not provided, uses the configuration setting 61 ``astropy.io.votable.verify``, which defaults to 'ignore'. 62 63 .. versionchanged:: 4.0 64 ``verify`` replaces the ``pedantic`` argument, which will be 65 deprecated in future. 66 .. versionchanged:: 5.0 67 The ``pedantic`` argument is deprecated. 68 69 chunk_size : int, optional 70 The number of rows to read before converting to an array. 71 Higher numbers are likely to be faster, but will consume more 72 memory. 73 74 table_number : int, optional 75 The number of table in the file to read in. If `None`, all 76 tables will be read. If a number, 0 refers to the first table 77 in the file, and only that numbered table will be parsed and 78 read in. Should not be used with ``table_id``. 79 80 table_id : str, optional 81 The ID of the table in the file to read in. Should not be 82 used with ``table_number``. 83 84 filename : str, optional 85 A filename, URL or other identifier to use in error messages. 86 If *filename* is None and *source* is a string (i.e. a path), 87 then *source* will be used as a filename for error messages. 88 Therefore, *filename* is only required when source is a 89 file-like object. 90 91 unit_format : str, astropy.units.format.Base instance or None, optional 92 The unit format to use when parsing unit attributes. If a 93 string, must be the name of a unit formatter. The built-in 94 formats include ``generic``, ``fits``, ``cds``, and 95 ``vounit``. A custom formatter may be provided by passing a 96 `~astropy.units.UnitBase` instance. If `None` (default), 97 the unit format to use will be the one specified by the 98 VOTable specification (which is ``cds`` up to version 1.3 of 99 VOTable, and ``vounit`` in more recent versions of the spec). 100 101 datatype_mapping : dict, optional 102 A mapping of datatype names (`str`) to valid VOTable datatype names 103 (str). For example, if the file being read contains the datatype 104 "unsignedInt" (an invalid datatype in VOTable), include the mapping 105 ``{"unsignedInt": "long"}``. 106 107 Returns 108 ------- 109 votable : `~astropy.io.votable.tree.VOTableFile` object 110 111 See also 112 -------- 113 astropy.io.votable.exceptions : The exceptions this function may raise. 114 """ 115 from . import conf 116 117 invalid = invalid.lower() 118 if invalid not in ('exception', 'mask'): 119 raise ValueError("accepted values of ``invalid`` are: " 120 "``'exception'`` or ``'mask'``.") 121 122 if verify is None: 123 124 conf_verify_lowercase = conf.verify.lower() 125 126 # We need to allow verify to be booleans as strings since the 127 # configuration framework doesn't make it easy/possible to have mixed 128 # types. 129 if conf_verify_lowercase in ['false', 'true']: 130 verify = conf_verify_lowercase == 'true' 131 else: 132 verify = conf_verify_lowercase 133 134 if isinstance(verify, bool): 135 verify = 'exception' if verify else 'warn' 136 elif verify not in VERIFY_OPTIONS: 137 raise ValueError(f"verify should be one of {'/'.join(VERIFY_OPTIONS)}") 138 139 if datatype_mapping is None: 140 datatype_mapping = {} 141 142 config = { 143 'columns': columns, 144 'invalid': invalid, 145 'verify': verify, 146 'chunk_size': chunk_size, 147 'table_number': table_number, 148 'filename': filename, 149 'unit_format': unit_format, 150 'datatype_mapping': datatype_mapping 151 } 152 153 if filename is None and isinstance(source, str): 154 config['filename'] = source 155 156 with iterparser.get_xml_iterator( 157 source, 158 _debug_python_based_parser=_debug_python_based_parser) as iterator: 159 return tree.VOTableFile( 160 config=config, pos=(1, 1)).parse(iterator, config) 161 162 163def parse_single_table(source, **kwargs): 164 """ 165 Parses a VOTABLE_ xml file (or file-like object), reading and 166 returning only the first `~astropy.io.votable.tree.Table` 167 instance. 168 169 See `parse` for a description of the keyword arguments. 170 171 Returns 172 ------- 173 votable : `~astropy.io.votable.tree.Table` object 174 """ 175 if kwargs.get('table_number') is None: 176 kwargs['table_number'] = 0 177 178 votable = parse(source, **kwargs) 179 180 return votable.get_first_table() 181 182 183def writeto(table, file, tabledata_format=None): 184 """ 185 Writes a `~astropy.io.votable.tree.VOTableFile` to a VOTABLE_ xml file. 186 187 Parameters 188 ---------- 189 table : `~astropy.io.votable.tree.VOTableFile` or `~astropy.table.Table` instance. 190 191 file : str or writable file-like 192 Path or file object to write to 193 194 tabledata_format : str, optional 195 Override the format of the table(s) data to write. Must be 196 one of ``tabledata`` (text representation), ``binary`` or 197 ``binary2``. By default, use the format that was specified in 198 each ``table`` object as it was created or read in. See 199 :ref:`astropy:astropy:votable-serialization`. 200 """ 201 from astropy.table import Table 202 if isinstance(table, Table): 203 table = tree.VOTableFile.from_table(table) 204 elif not isinstance(table, tree.VOTableFile): 205 raise TypeError( 206 "first argument must be astropy.io.vo.VOTableFile or " 207 "astropy.table.Table instance") 208 table.to_xml(file, tabledata_format=tabledata_format, 209 _debug_python_based_parser=True) 210 211 212def validate(source, output=None, xmllint=False, filename=None): 213 """ 214 Prints a validation report for the given file. 215 216 Parameters 217 ---------- 218 source : path-like or file-like 219 Path to a VOTABLE_ xml file or `~pathlib.Path` 220 object having Path to a VOTABLE_ xml file. 221 If file-like object, must be readable. 222 223 output : file-like, optional 224 Where to output the report. Defaults to ``sys.stdout``. 225 If `None`, the output will be returned as a string. 226 Must be writable. 227 228 xmllint : bool, optional 229 When `True`, also send the file to ``xmllint`` for schema and 230 DTD validation. Requires that ``xmllint`` is installed. The 231 default is `False`. ``source`` must be a file on the local 232 filesystem in order for ``xmllint`` to work. 233 234 filename : str, optional 235 A filename to use in the error messages. If not provided, one 236 will be automatically determined from ``source``. 237 238 Returns 239 ------- 240 is_valid : bool or str 241 Returns `True` if no warnings were found. If ``output`` is 242 `None`, the return value will be a string. 243 """ 244 245 from astropy.utils.console import print_code_line, color_print 246 247 if output is None: 248 output = sys.stdout 249 250 return_as_str = False 251 if output is None: 252 output = io.StringIO() 253 254 lines = [] 255 votable = None 256 257 reset_vo_warnings() 258 259 with data.get_readable_fileobj(source, encoding='binary') as fd: 260 content = fd.read() 261 content_buffer = io.BytesIO(content) 262 content_buffer.seek(0) 263 264 if filename is None: 265 if isinstance(source, str): 266 filename = source 267 elif hasattr(source, 'name'): 268 filename = source.name 269 elif hasattr(source, 'url'): 270 filename = source.url 271 else: 272 filename = "<unknown>" 273 274 with warnings.catch_warnings(record=True) as warning_lines: 275 warnings.resetwarnings() 276 warnings.simplefilter("always", exceptions.VOWarning, append=True) 277 try: 278 votable = parse(content_buffer, verify='warn', filename=filename) 279 except ValueError as e: 280 lines.append(str(e)) 281 282 lines = [str(x.message) for x in warning_lines if 283 issubclass(x.category, exceptions.VOWarning)] + lines 284 285 content_buffer.seek(0) 286 output.write(f"Validation report for {filename}\n\n") 287 288 if len(lines): 289 xml_lines = iterparser.xml_readlines(content_buffer) 290 291 for warning in lines: 292 w = exceptions.parse_vowarning(warning) 293 294 if not w['is_something']: 295 output.write(w['message']) 296 output.write('\n\n') 297 else: 298 line = xml_lines[w['nline'] - 1] 299 warning = w['warning'] 300 if w['is_warning']: 301 color = 'yellow' 302 else: 303 color = 'red' 304 color_print( 305 f"{w['nline']:d}: ", '', 306 warning or 'EXC', color, 307 ': ', '', 308 textwrap.fill( 309 w['message'], 310 initial_indent=' ', 311 subsequent_indent=' ').lstrip(), 312 file=output) 313 print_code_line(line, w['nchar'], file=output) 314 output.write('\n') 315 else: 316 output.write('astropy.io.votable found no violations.\n\n') 317 318 success = 0 319 if xmllint and os.path.exists(filename): 320 from . import xmlutil 321 322 if votable is None: 323 version = "1.1" 324 else: 325 version = votable.version 326 success, stdout, stderr = xmlutil.validate_schema( 327 filename, version) 328 329 if success != 0: 330 output.write( 331 'xmllint schema violations:\n\n') 332 output.write(stderr.decode('utf-8')) 333 else: 334 output.write('xmllint passed\n') 335 336 if return_as_str: 337 return output.getvalue() 338 return len(lines) == 0 and success == 0 339 340 341def from_table(table, table_id=None): 342 """ 343 Given an `~astropy.table.Table` object, return a 344 `~astropy.io.votable.tree.VOTableFile` file structure containing 345 just that single table. 346 347 Parameters 348 ---------- 349 table : `~astropy.table.Table` instance 350 351 table_id : str, optional 352 If not `None`, set the given id on the returned 353 `~astropy.io.votable.tree.Table` instance. 354 355 Returns 356 ------- 357 votable : `~astropy.io.votable.tree.VOTableFile` instance 358 """ 359 return tree.VOTableFile.from_table(table, table_id=table_id) 360 361 362def is_votable(source): 363 """ 364 Reads the header of a file to determine if it is a VOTable file. 365 366 Parameters 367 ---------- 368 source : path-like or file-like 369 Path or file object containing a VOTABLE_ xml file. 370 If file, must be readable. 371 372 Returns 373 ------- 374 is_votable : bool 375 Returns `True` if the given file is a VOTable file. 376 """ 377 try: 378 with iterparser.get_xml_iterator(source) as iterator: 379 for start, tag, d, pos in iterator: 380 if tag != 'xml': 381 return False 382 break 383 384 for start, tag, d, pos in iterator: 385 if tag != 'VOTABLE': 386 return False 387 break 388 389 return True 390 except ValueError: 391 return False 392 393 394def reset_vo_warnings(): 395 """ 396 Resets all of the vo warning state so that warnings that 397 have already been emitted will be emitted again. This is 398 used, for example, by `validate` which must emit all 399 warnings each time it is called. 400 401 """ 402 from . import converters, xmlutil 403 404 # -----------------------------------------------------------# 405 # This is a special variable used by the Python warnings # 406 # infrastructure to keep track of warnings that have # 407 # already been seen. Since we want to get every single # 408 # warning out of this, we have to delete all of them first. # 409 # -----------------------------------------------------------# 410 for module in (converters, exceptions, tree, xmlutil): 411 try: 412 del module.__warningregistry__ 413 except AttributeError: 414 pass 415