1from __future__ import absolute_import
2from __future__ import division
3from __future__ import print_function
4from __future__ import unicode_literals
5
6import io
7import os
8import json
9import six
10import requests
11from .exceptions import RegistryError
12
13
14# Module API
15
16class Registry(object):
17    '''Allow loading Data Package profiles from a registry.
18
19    Args:
20        registry_path_or_url (str): Path or URL to the registry's CSV file. It
21            defaults to the local registry cache path.
22
23    Raises:
24        RegistryError: If there was some problem opening the registry file or
25            its format was incorrect.
26    '''
27
28    # Public
29
30    DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'
31    DEFAULT_REGISTRY_PATH = os.path.join(
32        os.path.dirname(os.path.abspath(__file__)),
33        'profiles',
34        'registry.json'
35    )
36
37    def __init__(self, registry_path_or_url=DEFAULT_REGISTRY_PATH):
38        if os.path.isfile(registry_path_or_url):
39            self._BASE_PATH = os.path.dirname(
40                os.path.abspath(registry_path_or_url)
41            )
42        try:
43            self._profiles = {}
44            self._registry = self._get_registry(registry_path_or_url)
45        except (IOError, ValueError) as e:
46            six.raise_from(RegistryError(e), e)
47
48    @property
49    def available_profiles(self):
50        '''dict: The available profiles' metadata keyed by their ids.'''
51        return self._registry
52
53    @property
54    def base_path(self):
55        '''str: The base path of this Registry (None if it's remote).'''
56        try:
57            return self._BASE_PATH
58        except AttributeError:
59            pass
60
61    def get(self, profile_id):
62        '''Returns the profile with the received ID as a dict
63
64        If a local copy of the profile exists, it'll be returned. If not, it'll
65        be downloaded from the web. The results are cached, so any subsequent
66        calls won't hit the filesystem or the web.
67
68        Args:
69            profile_id (str): The ID of the profile you want.
70
71        Raises:
72            RegistryError: If there was some problem opening the profile file
73                or its format was incorrect.
74        '''
75        if profile_id not in self._profiles:
76            try:
77                self._profiles[profile_id] = self._get_profile(profile_id)
78            except (ValueError,
79                    IOError) as e:
80                six.raise_from(RegistryError(e), e)
81        return self._profiles[profile_id]
82
83    # Internal
84
85    def _get_profile(self, profile_id):
86        '''dict: Return the profile with the received ID as a dict (None if it
87        doesn't exist).'''
88        profile_metadata = self._registry.get(profile_id)
89        if not profile_metadata:
90            return
91
92        path = self._get_absolute_path(profile_metadata.get('schema_path'))
93        url = profile_metadata.get('schema')
94        if path:
95            try:
96                return self._load_json_file(path)
97            except IOError as local_exc:
98                if not url:
99                    raise local_exc
100
101                try:
102                    return self._load_json_url(url)
103                except IOError:
104                    msg = (
105                        'Error loading profile locally at "{path}" '
106                        'and remotely at "{url}".'
107                    ).format(path=path, url=url)
108                    six.raise_from(IOError(msg), local_exc)
109        elif url:
110            return self._load_json_url(url)
111
112    def _get_registry(self, registry_path_or_url):
113        '''dict: Return the registry as dict with profiles keyed by id.'''
114        if registry_path_or_url.startswith('http'):
115            profiles = self._load_json_url(registry_path_or_url)
116        else:
117            profiles = self._load_json_file(registry_path_or_url)
118        try:
119            registry = {}
120            for profile in profiles:
121                registry[profile['id']] = profile
122            return registry
123        except KeyError as e:
124            msg = (
125                'Registry at "{path}" has no "id" column.'
126            ).format(path=registry_path_or_url)
127            six.raise_from(ValueError(msg), e)
128
129    def _get_absolute_path(self, relative_path):
130        '''str: Return the received relative_path joined with the base path
131        (None if there were some error).'''
132        try:
133            return os.path.join(self.base_path, relative_path)
134        except (AttributeError, TypeError):
135            pass
136
137    def _load_json_file(self, path):
138        with io.open(path, 'r', encoding='utf-8') as f:
139            return json.load(f)
140
141    def _load_json_url(self, url):
142        '''dict: Return the JSON at the local path or URL as a dict.'''
143        res = requests.get(url)
144        res.raise_for_status()
145
146        return res.json()
147