1import re
2import threading
3import time
4import warnings
5import weakref
6from collections.abc import Mapping
7from collections import OrderedDict
8from functools import partial
9
10import requests
11from dateutil.parser import isoparse
12from logbook import Logger
13from represent import ReprHelper, ReprHelperMixin
14from rush.quota import Quota
15from rush.throttle import Throttle
16from rush.limiters.periodic import PeriodicLimiter
17from rush.stores.dictionary import DictionaryStore as RushDictionaryStore
18
19from .operators import _stringify_predicate_value
20
21logger = Logger('spacetrack')
22
23type_re = re.compile(r'(\w+)')
24enum_re = re.compile(r"""
25    enum\(
26        '(\w+)'      # First value
27        (?:,         # Subsequent values optional
28            '(\w+)'  # Capture string
29        )*
30    \)
31""", re.VERBOSE)
32
33BASE_URL = 'https://www.space-track.org/'
34
35
36class AuthenticationError(Exception):
37    """Space-Track authentication error."""
38
39
40class UnknownPredicateTypeWarning(RuntimeWarning):
41    """Used to warn when a predicate type is unknown."""
42
43
44class Predicate(ReprHelperMixin):
45    """Hold Space-Track predicate information.
46
47    The current goal of this class is to print the repr for the user.
48    """
49    def __init__(self, name, type_, nullable=False, default=None, values=None):
50        self.name = name
51        self.type_ = type_
52        self.nullable = nullable
53        self.default = default
54
55        # Values can be set e.g. for enum predicates
56        self.values = values
57
58    def _repr_helper_(self, r):
59        r.keyword_from_attr('name')
60        r.keyword_from_attr('type_')
61        r.keyword_from_attr('nullable')
62        r.keyword_from_attr('default')
63        if self.values is not None:
64            r.keyword_from_attr('values')
65
66    def parse(self, value):
67        if value is None:
68            return value
69
70        if self.type_ == 'float':
71            return float(value)
72        elif self.type_ == 'int':
73            return int(value)
74        elif self.type_ == 'datetime':
75            return isoparse(value)
76        elif self.type_ == 'date':
77            return isoparse(value).date()
78        else:
79            return value
80
81
82class SpaceTrackClient:
83    """SpaceTrack client class.
84
85    Parameters:
86        identity: Space-Track username.
87        password: Space-Track password.
88        base_url: May be overridden to use e.g. https://testing.space-track.org/
89        rush_store: A :mod:`rush` storage backend. By default, a
90            :class:`~rush.stores.dictionary.DictionaryStore` is used. You may
91            wish to use :class:`~rush.stores.dictionary.RedisStore` to
92            follow rate limits from multiple instances.
93        rush_key_prefix: You may choose a prefix for the keys that will be
94            stored in `rush_store`, e.g. to avoid conflicts in a redis db.
95
96    For more information, refer to the `Space-Track documentation`_.
97
98    .. _`Space-Track documentation`: https://www.space-track.org/documentation
99        #api-requestClasses
100
101    .. data:: request_controllers
102
103        Ordered dictionary of request controllers and their request classes in
104        the following order.
105
106        - `basicspacedata`
107        - `expandedspacedata`
108        - `fileshare`
109        - `spephemeris`
110
111        For example, if the ``spacetrack.file`` method is used without
112        specifying which controller, the client will choose the `fileshare`
113        controller (which comes before `spephemeris`).
114
115        .. note::
116
117            If new request classes and/or controllers are added to the
118            Space-Track API but not yet to this library, you can safely
119            subclass :class:`SpaceTrackClient` with a copy of this ordered
120            dictionary to add them.
121
122            That said, please open an issue on `GitHub`_ for me to add them to
123            the library.
124
125            .. _`GitHub`: https://github.com/python-astrodynamics/spacetrack
126    """
127
128    # "request class" methods will be looked up by request controller in this
129    # order
130    request_controllers = OrderedDict.fromkeys([
131        'basicspacedata',
132        'expandedspacedata',
133        'fileshare',
134        'spephemeris',
135    ])
136
137    request_controllers['basicspacedata'] = {
138        'announcement',
139        'boxscore',
140        'cdm_public',
141        'decay',
142        'gp',
143        'gp_history',
144        'launch_site',
145        'omm',
146        'satcat',
147        'satcat_change',
148        'satcat_debut',
149        'tip',
150        'tle',
151        'tle_latest',
152        'tle_publish',
153    }
154
155    request_controllers['expandedspacedata'] = {
156        'car',
157        'cdm',
158        'maneuver',
159        'maneuver_history',
160        'organization',
161        'satellite',
162    }
163
164    request_controllers['fileshare'] = {
165        'delete',
166        'download',
167        'file',
168        'folder',
169        'upload',
170    }
171
172    request_controllers['spephemeris'] = {
173        'download',
174        'file',
175        'file_history',
176    }
177
178    # List of (class, controller) tuples for
179    # requests which do not return a modeldef
180    offline_predicates = {
181        ('upload', 'fileshare'): {'folder_id', 'file'},
182        ('download', 'spephemeris'): set(),
183    }
184
185    # These predicates are available for every request class.
186    rest_predicates = {
187        Predicate('predicates', 'str'),
188        Predicate('metadata', 'enum', values=('true', 'false')),
189        Predicate('limit', 'str'),
190        Predicate('orderby', 'str'),
191        Predicate('distinct', 'enum', values=('true', 'false')),
192        Predicate(
193            'format', 'enum',
194            values=('json', 'xml', 'html', 'csv', 'tle', '3le', 'kvn', 'stream')),
195        Predicate('emptyresult', 'enum', values=('show',)),
196        Predicate('favorites', 'str'),
197    }
198
199    def __init__(
200        self,
201        identity,
202        password,
203        base_url=BASE_URL,
204        rush_store=None,
205        rush_key_prefix='',
206    ):
207        #: :class:`requests.Session` instance. It can be mutated to configure
208        #: e.g. proxies.
209        self.session = self._create_session()
210        self.identity = identity
211        self.password = password
212
213        if not base_url.endswith('/'):
214            base_url += '/'
215        self.base_url = base_url
216
217        # If set, this will be called when we sleep for the rate limit.
218        self.callback = None
219
220        self._authenticated = False
221        self._predicates = dict()
222        self._controller_proxies = dict()
223
224        # From https://www.space-track.org/documentation#/api:
225        #   Space-track throttles API use in order to maintain consistent
226        #   performance for all users. To avoid error messages, please limit
227        #   your query frequency.
228        #   Limit API queries to less than 30 requests per minute / 300 requests
229        #   per hour
230        if rush_store is None:
231            rush_store = RushDictionaryStore()
232        limiter = PeriodicLimiter(rush_store)
233        self._per_minute_throttle = Throttle(
234            limiter=limiter,
235            rate=Quota.per_minute(30),
236        )
237        self._per_hour_throttle = Throttle(
238            limiter=limiter,
239            rate=Quota.per_hour(300),
240        )
241        self._per_minute_key = rush_key_prefix + 'st_req_min'
242        self._per_hour_key = rush_key_prefix + 'st_req_hr'
243
244    def _ratelimit_callback(self, until):
245        duration = int(round(until - time.monotonic()))
246        logger.info('Rate limit reached. Sleeping for {:d} seconds.', duration)
247
248        if self.callback is not None:
249            self.callback(until)
250
251    @staticmethod
252    def _create_session():
253        """Create session for accessing the web.
254
255        This method is overridden in
256        :class:`spacetrac.aio.AsyncSpaceTrackClient` to use :mod:`aiohttp`
257        instead of :mod:`requests`.
258        """
259        return requests.Session()
260
261    def authenticate(self):
262        """Authenticate with Space-Track.
263
264        Raises:
265            spacetrack.base.AuthenticationError: Incorrect login details.
266
267        .. note::
268
269            This method is called automatically when required.
270        """
271        if not self._authenticated:
272            login_url = self.base_url + 'ajaxauth/login'
273            data = {'identity': self.identity, 'password': self.password}
274            resp = self.session.post(login_url, data=data)
275
276            _raise_for_status(resp)
277
278            # If login failed, we get a JSON response with {'Login': 'Failed'}
279            resp_data = resp.json()
280            if isinstance(resp_data, Mapping):
281                if resp_data.get('Login', None) == 'Failed':
282                    raise AuthenticationError()
283
284            self._authenticated = True
285
286    def generic_request(self, class_, iter_lines=False, iter_content=False,
287                        controller=None, parse_types=False, **kwargs):
288        r"""Generic Space-Track query.
289
290        The request class methods use this method internally; the public
291        API is as follows:
292
293        .. code-block:: python
294
295            st.tle_publish(*args, **kw)
296            st.basicspacedata.tle_publish(*args, **kw)
297            st.file(*args, **kw)
298            st.fileshare.file(*args, **kw)
299            st.spephemeris.file(*args, **kw)
300
301        They resolve to the following calls respectively:
302
303        .. code-block:: python
304
305            st.generic_request('tle_publish', *args, **kw)
306            st.generic_request('tle_publish', *args, controller='basicspacedata', **kw)
307            st.generic_request('file', *args, **kw)
308            st.generic_request('file', *args, controller='fileshare', **kw)
309            st.generic_request('file', *args, controller='spephemeris', **kw)
310
311        Parameters:
312            class\_: Space-Track request class name
313            iter_lines: Yield result line by line
314            iter_content: Yield result in 100 KiB chunks.
315            controller: Optionally specify request controller to use.
316            parse_types: Parse string values in response according to type given
317                in predicate information, e.g. ``'2017-01-01'`` ->
318                ``datetime.date(2017, 1, 1)``.
319            **kwargs: These keywords must match the predicate fields on
320                Space-Track. You may check valid keywords with the following
321                snippet:
322
323                .. code-block:: python
324
325                    spacetrack = SpaceTrackClient(...)
326                    spacetrack.tle.get_predicates()
327                    # or
328                    spacetrack.get_predicates('tle')
329
330                See :func:`~spacetrack.operators._stringify_predicate_value` for
331                which Python objects are converted appropriately.
332
333        Yields:
334            Lines—stripped of newline characters—if ``iter_lines=True``
335
336        Yields:
337            100 KiB chunks if ``iter_content=True``
338
339        Returns:
340            Parsed JSON object, unless ``format`` keyword argument is passed.
341
342            .. warning::
343
344                Passing ``format='json'`` will return the JSON **unparsed**. Do
345                not set ``format`` if you want the parsed JSON object returned!
346        """
347        if iter_lines and iter_content:
348            raise ValueError('iter_lines and iter_content cannot both be True')
349
350        if 'format' in kwargs and parse_types:
351            raise ValueError('parse_types can only be used if format is unset.')
352
353        if controller is None:
354            controller = self._find_controller(class_)
355        else:
356            classes = self.request_controllers.get(controller, None)
357            if classes is None:
358                raise ValueError(f'Unknown request controller {controller!r}')
359            if class_ not in classes:
360                raise ValueError(
361                    f'Unknown request class {class_!r} for controller {controller!r}')
362
363        # Decode unicode unless class == download, including conversion of
364        # CRLF newlines to LF.
365        decode = (class_ != 'download')
366        if not decode and iter_lines:
367            error = (
368                'iter_lines disabled for binary data, since CRLF newlines '
369                'split over chunk boundaries would yield extra blank lines. '
370                'Use iter_content=True instead.')
371            raise ValueError(error)
372
373        self.authenticate()
374
375        url = f'{self.base_url}{controller}/query/class/{class_}'
376
377        offline_check = (class_, controller) in self.offline_predicates
378        valid_fields = {p.name for p in self.rest_predicates}
379        predicates = None
380
381        if not offline_check:
382            # Validate keyword argument names by querying valid predicates from
383            # Space-Track
384            predicates = self.get_predicates(class_, controller)
385            predicate_fields = {p.name for p in predicates}
386            valid_fields |= predicate_fields
387        else:
388            valid_fields |= self.offline_predicates[(class_, controller)]
389
390        for key, value in kwargs.items():
391            if key not in valid_fields:
392                raise TypeError(f"'{class_}' got an unexpected argument '{key}'")
393
394            if class_ == 'upload' and key == 'file':
395                continue
396
397            value = _stringify_predicate_value(value)
398
399            url += f'/{key}/{value}'
400
401        logger.debug(requests.utils.requote_uri(url))
402
403        if class_ == 'upload':
404            if 'file' not in kwargs:
405                raise TypeError("missing keyword argument: 'file'")
406
407            resp = self.session.post(url, files={'file': kwargs['file']})
408        else:
409            resp = self._ratelimited_get(url, stream=iter_lines or iter_content)
410
411        _raise_for_status(resp)
412
413        if resp.encoding is None:
414            resp.encoding = 'UTF-8'
415
416        if iter_lines:
417            return _iter_lines_generator(resp, decode_unicode=decode)
418        elif iter_content:
419            return _iter_content_generator(resp, decode_unicode=decode)
420        else:
421            # If format is specified, return that format unparsed. Otherwise,
422            # parse the default JSON response.
423            if 'format' in kwargs:
424                if decode:
425                    data = resp.text
426                    # Replace CRLF newlines with LF, Python will handle platform
427                    # specific newlines if written to file.
428                    data = data.replace('\r\n', '\n')
429                else:
430                    data = resp.content
431                return data
432            else:
433                data = resp.json()
434
435                if predicates is None or not parse_types:
436                    return data
437                else:
438                    return self._parse_types(data, predicates)
439
440    @staticmethod
441    def _parse_types(data, predicates):
442        predicate_map = {p.name: p for p in predicates}
443
444        for obj in data:
445            for key, value in obj.items():
446                if key.lower() in predicate_map:
447                    obj[key] = predicate_map[key.lower()].parse(value)
448
449        return data
450
451    def _ratelimited_get(self, *args, **kwargs):
452        """Perform get request, handling rate limiting."""
453        minute_limit = self._per_minute_throttle.check(self._per_minute_key, 1)
454        hour_limit = self._per_hour_throttle.check(self._per_hour_key, 1)
455
456        sleep_time = 0
457
458        if minute_limit.limited:
459            sleep_time = minute_limit.retry_after.total_seconds()
460
461        if hour_limit.limited:
462            sleep_time = max(sleep_time, hour_limit.retry_after.total_seconds())
463
464        if sleep_time > 0:
465            self._ratelimit_wait(sleep_time)
466
467        resp = self.session.get(*args, **kwargs)
468
469        # It's possible that Space-Track will return HTTP status 500 with a
470        # query rate limit violation. This can happen if a script is cancelled
471        # before it has finished sleeping to satisfy the rate limit and it is
472        # started again.
473        #
474        # Let's catch this specific instance and retry once if it happens.
475        if resp.status_code == 500:
476            # Let's only retry if the error page tells us it's a rate limit
477            # violation.
478            if 'violated your query rate limit' in resp.text:
479                # It seems that only the per-minute rate limit causes an HTTP
480                # 500 error. Breaking the per-hour limit seems to result in an
481                # email from Space-Track instead.
482                self._ratelimit_wait(
483                    self._per_minute_throttle.rate.period.total_seconds()
484                )
485                resp = self.session.get(*args, **kwargs)
486
487        return resp
488
489    def _ratelimit_wait(self, duration):
490        until = time.monotonic() + duration
491        t = threading.Thread(target=self._ratelimit_callback, args=(until,))
492        t.daemon = True
493        t.start()
494        time.sleep(duration)
495
496    def __getattr__(self, attr):
497        if attr in self.request_controllers:
498            controller_proxy = self._controller_proxies.get(attr)
499            if controller_proxy is None:
500                controller_proxy = _ControllerProxy(self, attr)
501                self._controller_proxies[attr] = controller_proxy
502            return controller_proxy
503
504        try:
505            controller = self._find_controller(attr)
506        except ValueError:
507            raise AttributeError(
508                f"'{self.__class__.__name__}' object has no attribute '{attr}'")
509
510        # generic_request can resolve the controller itself, but we
511        # pass it because we have to check if the class_ is owned
512        # by a controller here anyway.
513        function = partial(
514            self.generic_request, class_=attr, controller=controller)
515        function.get_predicates = partial(
516            self.get_predicates, class_=attr, controller=controller)
517        return function
518
519    def __dir__(self):
520        """Include request controllers and request classes."""
521        attrs = list(self.__dict__)
522        request_classes = {
523            class_ for classes in self.request_controllers.values()
524            for class_ in classes}
525
526        attrs += list(request_classes)
527        attrs += list(self.request_controllers)
528
529        return sorted(attrs)
530
531    def _find_controller(self, class_):
532        """Find first controller that matches given request class.
533
534        Order is specified by the keys of
535        ``SpaceTrackClient.request_controllers``
536        (:class:`~collections.OrderedDict`)
537        """
538        for controller, classes in self.request_controllers.items():
539            if class_ in classes:
540                return controller
541        else:
542            raise ValueError(f'Unknown request class {class_!r}')
543
544    def _download_predicate_data(self, class_, controller):
545        """Get raw predicate information for given request class, and cache for
546        subsequent calls.
547        """
548        self.authenticate()
549
550        url = f'{self.base_url}{controller}/modeldef/class/{class_}'
551
552        logger.debug(requests.utils.requote_uri(url))
553
554        resp = self._ratelimited_get(url)
555
556        _raise_for_status(resp)
557
558        return resp.json()['data']
559
560    def get_predicates(self, class_, controller=None):
561        """Get full predicate information for given request class, and cache
562        for subsequent calls.
563        """
564        if class_ not in self._predicates:
565            if controller is None:
566                controller = self._find_controller(class_)
567            else:
568                classes = self.request_controllers.get(controller, None)
569                if classes is None:
570                    raise ValueError(
571                        f'Unknown request controller {controller!r}')
572                if class_ not in classes:
573                    raise ValueError(
574                        f'Unknown request class {class_!r}')
575
576            predicates_data = self._download_predicate_data(class_, controller)
577            predicate_objects = self._parse_predicates_data(predicates_data)
578            self._predicates[class_] = predicate_objects
579
580        return self._predicates[class_]
581
582    def _parse_predicates_data(self, predicates_data):
583        predicate_objects = []
584        for field in predicates_data:
585            full_type = field['Type']
586            type_match = type_re.match(full_type)
587            if not type_match:
588                raise ValueError(
589                    f"Couldn't parse field type '{full_type}'")
590
591            type_name = type_match.group(1)
592            field_name = field['Field'].lower()
593            nullable = (field['Null'] == 'YES')
594            default = field['Default']
595
596            types = {
597                # Strings
598                'char': 'str',
599                'varchar': 'str',
600                'longtext': 'str',
601                'text': 'str',
602                # varbinary only used for 'file' request class, for the
603                # 'file_link' predicate.
604                'varbinary': 'str',
605                # Integers
606                'bigint': 'int',
607                'int': 'int',
608                'tinyint': 'int',
609                'smallint': 'int',
610                'mediumint': 'int',
611                # Floats
612                'decimal': 'float',
613                'float': 'float',
614                'double': 'float',
615                # Date/Times
616                'date': 'date',
617                'timestamp': 'datetime',
618                'datetime': 'datetime',
619                # Enum
620                'enum': 'enum',
621                # Bytes
622                'longblob': 'bytes',
623            }
624
625            if type_name not in types:
626                warnings.warn(
627                    f'Unknown predicate type {type_name!r}',
628                    UnknownPredicateTypeWarning,
629                )
630
631            predicate = Predicate(
632                name=field_name,
633                type_=types.get(type_name, type_name),
634                nullable=nullable,
635                default=default)
636
637            if type_name == 'enum':
638                enum_match = enum_re.match(full_type)
639                if not enum_match:
640                    raise ValueError(
641                        f"Couldn't parse enum type '{full_type}'")
642
643                # match.groups() doesn't work for repeating groups, use findall
644                predicate.values = tuple(re.findall(r"'(\w+)'", full_type))
645
646            predicate_objects.append(predicate)
647
648        return predicate_objects
649
650    def __repr__(self):
651        r = ReprHelper(self)
652        r.parantheses = ('<', '>')
653        r.keyword_from_attr('identity')
654        return str(r)
655
656
657class _ControllerProxy:
658    """Proxies request class methods with a preset request controller."""
659    def __init__(self, client, controller):
660        # The client will cache _ControllerProxy instances, so only store
661        # a weak reference to it.
662        self.client = weakref.proxy(client)
663        self.controller = controller
664
665    def __getattr__(self, attr):
666        if attr not in self.client.request_controllers[self.controller]:
667            raise AttributeError(f"'{self!r}' object has no attribute '{attr}'")
668
669        function = partial(
670            self.client.generic_request, class_=attr,
671            controller=self.controller)
672        function.get_predicates = partial(
673            self.client.get_predicates, class_=attr,
674            controller=self.controller)
675
676        return function
677
678    def __repr__(self):
679        r = ReprHelper(self)
680        r.parantheses = ('<', '>')
681        r.keyword_from_attr('controller')
682        return str(r)
683
684    def get_predicates(self, class_):
685        """Proxy ``get_predicates`` to client with stored request
686        controller.
687        """
688        return self.client.get_predicates(
689            class_=class_, controller=self.controller)
690
691
692def _iter_content_generator(response, decode_unicode):
693    """Generator used to yield 100 KiB chunks for a given response."""
694    for chunk in response.iter_content(100 * 1024, decode_unicode=decode_unicode):
695        if decode_unicode:
696            # Replace CRLF newlines with LF, Python will handle
697            # platform specific newlines if written to file.
698            chunk = chunk.replace('\r\n', '\n')
699            # Chunk could be ['...\r', '\n...'], stril trailing \r
700            chunk = chunk.rstrip('\r')
701        yield chunk
702
703
704def _iter_lines_generator(response, decode_unicode):
705    """Iterates over the response data, one line at a time.  When
706    stream=True is set on the request, this avoids reading the
707    content at once into memory for large responses.
708
709    The function is taken from :meth:`requests.models.Response.iter_lines`, but
710    modified to use our :func:`~spacetrack.base._iter_content_generator`. This
711    is because Space-Track uses CRLF newlines, so :meth:`str.splitlines` can
712    cause us to yield blank lines if one chunk ends with CR and the next one
713    starts with LF.
714
715    .. note:: This method is not reentrant safe.
716    """
717    pending = None
718
719    for chunk in _iter_content_generator(response, decode_unicode=decode_unicode):
720
721        if pending is not None:
722            chunk = pending + chunk
723
724        lines = chunk.splitlines()
725
726        if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
727            pending = lines.pop()
728        else:
729            pending = None
730
731        yield from lines
732
733    if pending is not None:
734        yield pending
735
736
737def _raise_for_status(response):
738    """Raises stored :class:`HTTPError`, if one occurred.
739
740    This is the :meth:`requests.models.Response.raise_for_status` method,
741    modified to add the response from Space-Track, if given.
742    """
743
744    http_error_msg = ''
745
746    if 400 <= response.status_code < 500:
747        http_error_msg = (
748            f'{response.status_code} Client Error: {response.reason} '
749            f'for url: {response.url}'
750        )
751
752    elif 500 <= response.status_code < 600:
753        http_error_msg = (
754            f'{response.status_code} Server Error: {response.reason} '
755            f'for url: {response.url}'
756        )
757
758    if http_error_msg:
759        spacetrack_error_msg = None
760
761        try:
762            json = response.json()
763            if isinstance(json, Mapping):
764                spacetrack_error_msg = json['error']
765        except (ValueError, KeyError):
766            pass
767
768        if not spacetrack_error_msg:
769            spacetrack_error_msg = response.text
770
771        if spacetrack_error_msg:
772            http_error_msg += '\nSpace-Track response:\n' + spacetrack_error_msg
773
774        raise requests.HTTPError(http_error_msg, response=response)
775