1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2""" 3Validates a large collection of web-accessible VOTable files, 4and generates a report as a directory tree of HTML files. 5""" 6 7# STDLIB 8import os 9 10# LOCAL 11from astropy.utils.data import get_pkg_data_filename 12from . import html 13from . import result 14 15 16__all__ = ['make_validation_report'] 17 18 19def get_srcdir(): 20 return os.path.dirname(__file__) 21 22 23def get_urls(destdir, s): 24 import gzip 25 26 types = ['good', 'broken', 'incorrect'] 27 28 seen = set() 29 urls = [] 30 for type in types: 31 filename = get_pkg_data_filename( 32 f'data/urls/cone.{type}.dat.gz') 33 with gzip.open(filename, 'rb') as fd: 34 for url in fd.readlines(): 35 next(s) 36 url = url.strip() 37 if url not in seen: 38 with result.Result(url, root=destdir) as r: 39 r['expected'] = type 40 urls.append(url) 41 seen.add(url) 42 43 return urls 44 45 46def download(args): 47 url, destdir = args 48 with result.Result(url, root=destdir) as r: 49 r.download_xml_content() 50 51 52def validate_vo(args): 53 url, destdir = args 54 with result.Result(url, root=destdir) as r: 55 r.validate_vo() 56 57 58def votlint_validate(args): 59 path_to_stilts_jar, url, destdir = args 60 with result.Result(url, root=destdir) as r: 61 if r['network_error'] is None: 62 r.validate_with_votlint(path_to_stilts_jar) 63 64 65def write_html_result(args): 66 url, destdir = args 67 with result.Result(url, root=destdir) as r: 68 html.write_result(r) 69 70 71def write_subindex(args): 72 subset, destdir, total = args 73 html.write_index_table(destdir, *subset, total=total) 74 75 76def make_validation_report( 77 urls=None, destdir='astropy.io.votable.validator.results', 78 multiprocess=True, stilts=None): 79 """ 80 Validates a large collection of web-accessible VOTable files. 81 82 Generates a report as a directory tree of HTML files. 83 84 Parameters 85 ---------- 86 urls : list of str, optional 87 If provided, is a list of HTTP urls to download VOTable files 88 from. If not provided, a built-in set of ~22,000 urls 89 compiled by HEASARC will be used. 90 91 destdir : path-like, optional 92 The directory to write the report to. By default, this is a 93 directory called ``'results'`` in the current directory. If the 94 directory does not exist, it will be created. 95 96 multiprocess : bool, optional 97 If `True` (default), perform validations in parallel using all 98 of the cores on this machine. 99 100 stilts : path-like, optional 101 To perform validation with ``votlint`` from the the Java-based 102 `STILTS <http://www.star.bris.ac.uk/~mbt/stilts/>`_ VOTable 103 parser, in addition to `astropy.io.votable`, set this to the 104 path of the ``'stilts.jar'`` file. ``java`` on the system shell 105 path will be used to run it. 106 107 Notes 108 ----- 109 Downloads of each given URL will be performed only once and cached 110 locally in *destdir*. To refresh the cache, remove *destdir* 111 first. 112 """ 113 from astropy.utils.console import (color_print, ProgressBar, Spinner) 114 115 if stilts is not None: 116 if not os.path.exists(stilts): 117 raise ValueError( 118 f'{stilts} does not exist.') 119 120 destdir = os.path.abspath(destdir) 121 122 if urls is None: 123 with Spinner('Loading URLs', 'green') as s: 124 urls = get_urls(destdir, s) 125 else: 126 color_print('Marking URLs', 'green') 127 for url in ProgressBar.iterate(urls): 128 with result.Result(url, root=destdir) as r: 129 r['expected'] = type 130 131 args = [(url, destdir) for url in urls] 132 133 color_print('Downloading VO files', 'green') 134 ProgressBar.map( 135 download, args, multiprocess=multiprocess) 136 137 color_print('Validating VO files', 'green') 138 ProgressBar.map( 139 validate_vo, args, multiprocess=multiprocess) 140 141 if stilts is not None: 142 color_print('Validating with votlint', 'green') 143 votlint_args = [(stilts, x, destdir) for x in urls] 144 ProgressBar.map( 145 votlint_validate, votlint_args, multiprocess=multiprocess) 146 147 color_print('Generating HTML files', 'green') 148 ProgressBar.map( 149 write_html_result, args, multiprocess=multiprocess) 150 151 with Spinner('Grouping results', 'green') as s: 152 subsets = result.get_result_subsets(urls, destdir, s) 153 154 color_print('Generating index', 'green') 155 html.write_index(subsets, urls, destdir) 156 157 color_print('Generating subindices', 'green') 158 subindex_args = [(subset, destdir, len(urls)) for subset in subsets] 159 ProgressBar.map( 160 write_subindex, subindex_args, multiprocess=multiprocess) 161