1from __future__ import absolute_import 2from __future__ import division 3from __future__ import print_function 4from __future__ import unicode_literals 5 6import io 7import os 8import json 9import six 10import requests 11from .exceptions import RegistryError 12 13 14# Module API 15 16class Registry(object): 17 '''Allow loading Data Package profiles from a registry. 18 19 Args: 20 registry_path_or_url (str): Path or URL to the registry's CSV file. It 21 defaults to the local registry cache path. 22 23 Raises: 24 RegistryError: If there was some problem opening the registry file or 25 its format was incorrect. 26 ''' 27 28 # Public 29 30 DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json' 31 DEFAULT_REGISTRY_PATH = os.path.join( 32 os.path.dirname(os.path.abspath(__file__)), 33 'profiles', 34 'registry.json' 35 ) 36 37 def __init__(self, registry_path_or_url=DEFAULT_REGISTRY_PATH): 38 if os.path.isfile(registry_path_or_url): 39 self._BASE_PATH = os.path.dirname( 40 os.path.abspath(registry_path_or_url) 41 ) 42 try: 43 self._profiles = {} 44 self._registry = self._get_registry(registry_path_or_url) 45 except (IOError, ValueError) as e: 46 six.raise_from(RegistryError(e), e) 47 48 @property 49 def available_profiles(self): 50 '''dict: The available profiles' metadata keyed by their ids.''' 51 return self._registry 52 53 @property 54 def base_path(self): 55 '''str: The base path of this Registry (None if it's remote).''' 56 try: 57 return self._BASE_PATH 58 except AttributeError: 59 pass 60 61 def get(self, profile_id): 62 '''Returns the profile with the received ID as a dict 63 64 If a local copy of the profile exists, it'll be returned. If not, it'll 65 be downloaded from the web. The results are cached, so any subsequent 66 calls won't hit the filesystem or the web. 67 68 Args: 69 profile_id (str): The ID of the profile you want. 70 71 Raises: 72 RegistryError: If there was some problem opening the profile file 73 or its format was incorrect. 74 ''' 75 if profile_id not in self._profiles: 76 try: 77 self._profiles[profile_id] = self._get_profile(profile_id) 78 except (ValueError, 79 IOError) as e: 80 six.raise_from(RegistryError(e), e) 81 return self._profiles[profile_id] 82 83 # Internal 84 85 def _get_profile(self, profile_id): 86 '''dict: Return the profile with the received ID as a dict (None if it 87 doesn't exist).''' 88 profile_metadata = self._registry.get(profile_id) 89 if not profile_metadata: 90 return 91 92 path = self._get_absolute_path(profile_metadata.get('schema_path')) 93 url = profile_metadata.get('schema') 94 if path: 95 try: 96 return self._load_json_file(path) 97 except IOError as local_exc: 98 if not url: 99 raise local_exc 100 101 try: 102 return self._load_json_url(url) 103 except IOError: 104 msg = ( 105 'Error loading profile locally at "{path}" ' 106 'and remotely at "{url}".' 107 ).format(path=path, url=url) 108 six.raise_from(IOError(msg), local_exc) 109 elif url: 110 return self._load_json_url(url) 111 112 def _get_registry(self, registry_path_or_url): 113 '''dict: Return the registry as dict with profiles keyed by id.''' 114 if registry_path_or_url.startswith('http'): 115 profiles = self._load_json_url(registry_path_or_url) 116 else: 117 profiles = self._load_json_file(registry_path_or_url) 118 try: 119 registry = {} 120 for profile in profiles: 121 registry[profile['id']] = profile 122 return registry 123 except KeyError as e: 124 msg = ( 125 'Registry at "{path}" has no "id" column.' 126 ).format(path=registry_path_or_url) 127 six.raise_from(ValueError(msg), e) 128 129 def _get_absolute_path(self, relative_path): 130 '''str: Return the received relative_path joined with the base path 131 (None if there were some error).''' 132 try: 133 return os.path.join(self.base_path, relative_path) 134 except (AttributeError, TypeError): 135 pass 136 137 def _load_json_file(self, path): 138 with io.open(path, 'r', encoding='utf-8') as f: 139 return json.load(f) 140 141 def _load_json_url(self, url): 142 '''dict: Return the JSON at the local path or URL as a dict.''' 143 res = requests.get(url) 144 res.raise_for_status() 145 146 return res.json() 147