1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
3"""
4This module contains convenience functions for getting a coordinate object
5for a named object by querying SESAME and getting the first returned result.
6Note that this is intended to be a convenience, and is very simple. If you
7need precise coordinates for an object you should find the appropriate
8reference for that measurement and input the coordinates manually.
9"""
10
11# Standard library
12import os
13import re
14import socket
15import urllib.request
16import urllib.parse
17import urllib.error
18
19# Astropy
20from astropy import units as u
21from .sky_coordinate import SkyCoord
22from astropy.utils import data
23from astropy.utils.data import download_file, get_file_contents
24from astropy.utils.state import ScienceState
25
26__all__ = ["get_icrs_coordinates"]
27
28
29class sesame_url(ScienceState):
30    """
31    The URL(s) to Sesame's web-queryable database.
32    """
33    _value = ["http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/",
34              "http://vizier.cfa.harvard.edu/viz-bin/nph-sesame/"]
35
36    @classmethod
37    def validate(cls, value):
38        # TODO: Implement me
39        return value
40
41
42class sesame_database(ScienceState):
43    """
44    This specifies the default database that SESAME will query when
45    using the name resolve mechanism in the coordinates
46    subpackage. Default is to search all databases, but this can be
47    'all', 'simbad', 'ned', or 'vizier'.
48    """
49    _value = 'all'
50
51    @classmethod
52    def validate(cls, value):
53        if value not in ['all', 'simbad', 'ned', 'vizier']:
54            raise ValueError(f"Unknown database '{value}'")
55        return value
56
57
58class NameResolveError(Exception):
59    pass
60
61
62def _parse_response(resp_data):
63    """
64    Given a string response from SESAME, parse out the coordinates by looking
65    for a line starting with a J, meaning ICRS J2000 coordinates.
66
67    Parameters
68    ----------
69    resp_data : str
70        The string HTTP response from SESAME.
71
72    Returns
73    -------
74    ra : str
75        The string Right Ascension parsed from the HTTP response.
76    dec : str
77        The string Declination parsed from the HTTP response.
78    """
79
80    pattr = re.compile(r"%J\s*([0-9\.]+)\s*([\+\-\.0-9]+)")
81    matched = pattr.search(resp_data)
82
83    if matched is None:
84        return None, None
85    else:
86        ra, dec = matched.groups()
87        return ra, dec
88
89
90def get_icrs_coordinates(name, parse=False, cache=False):
91    """
92    Retrieve an ICRS object by using an online name resolving service to
93    retrieve coordinates for the specified name. By default, this will
94    search all available databases until a match is found. If you would like
95    to specify the database, use the science state
96    ``astropy.coordinates.name_resolve.sesame_database``. You can also
97    specify a list of servers to use for querying Sesame using the science
98    state ``astropy.coordinates.name_resolve.sesame_url``. This will try
99    each one in order until a valid response is returned. By default, this
100    list includes the main Sesame host and a mirror at vizier.  The
101    configuration item `astropy.utils.data.Conf.remote_timeout` controls the
102    number of seconds to wait for a response from the server before giving
103    up.
104
105    Parameters
106    ----------
107    name : str
108        The name of the object to get coordinates for, e.g. ``'M42'``.
109    parse : bool
110        Whether to attempt extracting the coordinates from the name by
111        parsing with a regex. For objects catalog names that have
112        J-coordinates embedded in their names eg:
113        'CRTS SSS100805 J194428-420209', this may be much faster than a
114        sesame query for the same object name. The coordinates extracted
115        in this way may differ from the database coordinates by a few
116        deci-arcseconds, so only use this option if you do not need
117        sub-arcsecond accuracy for coordinates.
118    cache : bool, str, optional
119        Determines whether to cache the results or not. Passed through to
120        `~astropy.utils.data.download_file`, so pass "update" to update the
121        cached value.
122
123    Returns
124    -------
125    coord : `astropy.coordinates.ICRS` object
126        The object's coordinates in the ICRS frame.
127
128    """
129
130    # if requested, first try extract coordinates embedded in the object name.
131    # Do this first since it may be much faster than doing the sesame query
132    if parse:
133        from . import jparser
134        if jparser.search(name):
135            return jparser.to_skycoord(name)
136        else:
137            # if the parser failed, fall back to sesame query.
138            pass
139            # maybe emit a warning instead of silently falling back to sesame?
140
141    database = sesame_database.get()
142    # The web API just takes the first letter of the database name
143    db = database.upper()[0]
144
145    # Make sure we don't have duplicates in the url list
146    urls = []
147    domains = []
148    for url in sesame_url.get():
149        domain = urllib.parse.urlparse(url).netloc
150
151        # Check for duplicates
152        if domain not in domains:
153            domains.append(domain)
154
155            # Add the query to the end of the url, add to url list
156            fmt_url = os.path.join(url, "{db}?{name}")
157            fmt_url = fmt_url.format(name=urllib.parse.quote(name), db=db)
158            urls.append(fmt_url)
159
160    exceptions = []
161    for url in urls:
162        try:
163            resp_data = get_file_contents(
164                download_file(url, cache=cache, show_progress=False))
165            break
166        except urllib.error.URLError as e:
167            exceptions.append(e)
168            continue
169        except socket.timeout as e:
170            # There are some cases where urllib2 does not catch socket.timeout
171            # especially while receiving response data on an already previously
172            # working request
173            e.reason = ("Request took longer than the allowed "
174                        f"{data.conf.remote_timeout:.1f} seconds")
175            exceptions.append(e)
176            continue
177
178    # All Sesame URL's failed...
179    else:
180        messages = [f"{url}: {e.reason}"
181                    for url, e in zip(urls, exceptions)]
182        raise NameResolveError("All Sesame queries failed. Unable to "
183                               "retrieve coordinates. See errors per URL "
184                               f"below: \n {os.linesep.join(messages)}")
185
186    ra, dec = _parse_response(resp_data)
187
188    if ra is None or dec is None:
189        if db == "A":
190            err = f"Unable to find coordinates for name '{name}' using {url}"
191        else:
192            err = f"Unable to find coordinates for name '{name}' in database {database} using {url}"
193
194        raise NameResolveError(err)
195
196    # Return SkyCoord object
197    sc = SkyCoord(ra=ra, dec=dec, unit=(u.degree, u.degree), frame='icrs')
198    return sc
199