1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2"""
3Validates a large collection of web-accessible VOTable files,
4and generates a report as a directory tree of HTML files.
5"""
6
7# STDLIB
8import os
9
10# LOCAL
11from astropy.utils.data import get_pkg_data_filename
12from . import html
13from . import result
14
15
16__all__ = ['make_validation_report']
17
18
19def get_srcdir():
20    return os.path.dirname(__file__)
21
22
23def get_urls(destdir, s):
24    import gzip
25
26    types = ['good', 'broken', 'incorrect']
27
28    seen = set()
29    urls = []
30    for type in types:
31        filename = get_pkg_data_filename(
32            f'data/urls/cone.{type}.dat.gz')
33        with gzip.open(filename, 'rb') as fd:
34            for url in fd.readlines():
35                next(s)
36                url = url.strip()
37                if url not in seen:
38                    with result.Result(url, root=destdir) as r:
39                        r['expected'] = type
40                    urls.append(url)
41                seen.add(url)
42
43    return urls
44
45
46def download(args):
47    url, destdir = args
48    with result.Result(url, root=destdir) as r:
49        r.download_xml_content()
50
51
52def validate_vo(args):
53    url, destdir = args
54    with result.Result(url, root=destdir) as r:
55        r.validate_vo()
56
57
58def votlint_validate(args):
59    path_to_stilts_jar, url, destdir = args
60    with result.Result(url, root=destdir) as r:
61        if r['network_error'] is None:
62            r.validate_with_votlint(path_to_stilts_jar)
63
64
65def write_html_result(args):
66    url, destdir = args
67    with result.Result(url, root=destdir) as r:
68        html.write_result(r)
69
70
71def write_subindex(args):
72    subset, destdir, total = args
73    html.write_index_table(destdir, *subset, total=total)
74
75
76def make_validation_report(
77    urls=None, destdir='astropy.io.votable.validator.results',
78    multiprocess=True, stilts=None):
79    """
80    Validates a large collection of web-accessible VOTable files.
81
82    Generates a report as a directory tree of HTML files.
83
84    Parameters
85    ----------
86    urls : list of str, optional
87        If provided, is a list of HTTP urls to download VOTable files
88        from.  If not provided, a built-in set of ~22,000 urls
89        compiled by HEASARC will be used.
90
91    destdir : path-like, optional
92        The directory to write the report to.  By default, this is a
93        directory called ``'results'`` in the current directory. If the
94        directory does not exist, it will be created.
95
96    multiprocess : bool, optional
97        If `True` (default), perform validations in parallel using all
98        of the cores on this machine.
99
100    stilts : path-like, optional
101        To perform validation with ``votlint`` from the the Java-based
102        `STILTS <http://www.star.bris.ac.uk/~mbt/stilts/>`_ VOTable
103        parser, in addition to `astropy.io.votable`, set this to the
104        path of the ``'stilts.jar'`` file.  ``java`` on the system shell
105        path will be used to run it.
106
107    Notes
108    -----
109    Downloads of each given URL will be performed only once and cached
110    locally in *destdir*.  To refresh the cache, remove *destdir*
111    first.
112    """
113    from astropy.utils.console import (color_print, ProgressBar, Spinner)
114
115    if stilts is not None:
116        if not os.path.exists(stilts):
117            raise ValueError(
118                f'{stilts} does not exist.')
119
120    destdir = os.path.abspath(destdir)
121
122    if urls is None:
123        with Spinner('Loading URLs', 'green') as s:
124            urls = get_urls(destdir, s)
125    else:
126        color_print('Marking URLs', 'green')
127        for url in ProgressBar.iterate(urls):
128            with result.Result(url, root=destdir) as r:
129                r['expected'] = type
130
131    args = [(url, destdir) for url in urls]
132
133    color_print('Downloading VO files', 'green')
134    ProgressBar.map(
135        download, args, multiprocess=multiprocess)
136
137    color_print('Validating VO files', 'green')
138    ProgressBar.map(
139        validate_vo, args, multiprocess=multiprocess)
140
141    if stilts is not None:
142        color_print('Validating with votlint', 'green')
143        votlint_args = [(stilts, x, destdir) for x in urls]
144        ProgressBar.map(
145            votlint_validate, votlint_args, multiprocess=multiprocess)
146
147    color_print('Generating HTML files', 'green')
148    ProgressBar.map(
149        write_html_result, args, multiprocess=multiprocess)
150
151    with Spinner('Grouping results', 'green') as s:
152        subsets = result.get_result_subsets(urls, destdir, s)
153
154    color_print('Generating index', 'green')
155    html.write_index(subsets, urls, destdir)
156
157    color_print('Generating subindices', 'green')
158    subindex_args = [(subset, destdir, len(urls)) for subset in subsets]
159    ProgressBar.map(
160        write_subindex, subindex_args, multiprocess=multiprocess)
161