1"""Manages dvc remotes that user can use with push/pull/status comamnds.""" 2 3from __future__ import unicode_literals 4 5import dvc.logger as logger 6from dvc.config import Config, ConfigError 7 8from dvc.remote import Remote 9from dvc.remote.s3 import RemoteS3 10from dvc.remote.gs import RemoteGS 11from dvc.remote.azure import RemoteAzure 12from dvc.remote.ssh import RemoteSSH 13from dvc.remote.hdfs import RemoteHDFS 14from dvc.remote.local import RemoteLOCAL 15from dvc.remote.http import RemoteHTTP 16 17 18class DataCloud(object): 19 """Class that manages dvc remotes. 20 21 Args: 22 repo (dvc.repo.Repo): repo instance that belongs to the repo that 23 we are working on. 24 config (dict): config of the repo. 25 26 Raises: 27 config.ConfigError: thrown when config has invalid format. 28 """ 29 30 CLOUD_MAP = { 31 "aws": RemoteS3, 32 "gcp": RemoteGS, 33 "azure": RemoteAzure, 34 "ssh": RemoteSSH, 35 "hdfs": RemoteHDFS, 36 "local": RemoteLOCAL, 37 "http": RemoteHTTP, 38 "https": RemoteHTTP, 39 } 40 41 def __init__(self, repo, config=None): 42 self.repo = repo 43 self._config = config 44 self._core = self._config[Config.SECTION_CORE] 45 46 @property 47 def _cloud(self): 48 remote = self._core.get(Config.SECTION_CORE_REMOTE, "") 49 if remote != "": 50 return self._init_remote(remote) 51 52 if self._core.get(Config.SECTION_CORE_CLOUD, None): 53 # backward compatibility 54 msg = "using obsoleted config format. Consider updating." 55 logger.warning(msg) 56 return self._init_compat() 57 58 return None 59 60 def _init_remote(self, remote): 61 section = Config.SECTION_REMOTE_FMT.format(remote).lower() 62 cloud_config = self._config.get(section, None) 63 if not cloud_config: 64 msg = "can't find remote section '{}' in config" 65 raise ConfigError(msg.format(section)) 66 67 return Remote(self.repo, cloud_config) 68 69 def _init_compat(self): 70 name = self._core.get(Config.SECTION_CORE_CLOUD, "").strip().lower() 71 if name == "": 72 self._cloud = None 73 return None 74 75 cloud_type = self.CLOUD_MAP.get(name, None) 76 if not cloud_type: 77 msg = "wrong cloud type '{}' specified".format(name) 78 raise ConfigError(msg) 79 80 cloud_config = self._config.get(name, None) 81 if not cloud_config: 82 msg = "can't find cloud section '{}' in config".format(name) 83 raise ConfigError(msg) 84 85 # NOTE: check if the class itself has everything needed for operation. 86 # E.g. all the imported packages. 87 if not cloud_type.supported(cloud_type.compat_config(cloud_config)): 88 raise ConfigError("unsupported cloud '{}'".format(name)) 89 90 return self._init_cloud(cloud_config, cloud_type) 91 92 def _init_cloud(self, cloud_config, cloud_type): 93 global_storage_path = self._core.get(Config.SECTION_CORE_STORAGEPATH) 94 if global_storage_path: 95 logger.warning("using obsoleted config format. Consider updating.") 96 97 cloud = cloud_type(self.repo, cloud_config) 98 return cloud 99 100 def _get_cloud(self, remote, cmd): 101 if remote: 102 return self._init_remote(remote) 103 104 if self._cloud: 105 return self._cloud 106 107 raise ConfigError( 108 "No remote repository specified. Setup default repository with\n" 109 " dvc config core.remote <name>\n" 110 "or use:\n" 111 " dvc {} -r <name>\n".format(cmd) 112 ) 113 114 def push(self, targets, jobs=None, remote=None, show_checksums=False): 115 """Push data items in a cloud-agnostic way. 116 117 Args: 118 targets (list): list of targets to push to the cloud. 119 jobs (int): number of jobs that can be running simultaneously. 120 remote (dvc.remote.base.RemoteBase): optional remote to push to. 121 By default remote from core.remote config option is used. 122 show_checksums (bool): show checksums instead of file names in 123 information messages. 124 """ 125 return self.repo.cache.local.push( 126 targets, 127 jobs=jobs, 128 remote=self._get_cloud(remote, "push"), 129 show_checksums=show_checksums, 130 ) 131 132 def pull(self, targets, jobs=None, remote=None, show_checksums=False): 133 """Pull data items in a cloud-agnostic way. 134 135 Args: 136 targets (list): list of targets to pull from the cloud. 137 jobs (int): number of jobs that can be running simultaneously. 138 remote (dvc.remote.base.RemoteBase): optional remote to pull from. 139 By default remote from core.remote config option is used. 140 show_checksums (bool): show checksums instead of file names in 141 information messages. 142 """ 143 return self.repo.cache.local.pull( 144 targets, 145 jobs=jobs, 146 remote=self._get_cloud(remote, "pull"), 147 show_checksums=show_checksums, 148 ) 149 150 def status(self, targets, jobs=None, remote=None, show_checksums=False): 151 """Check status of data items in a cloud-agnostic way. 152 153 Args: 154 targets (list): list of targets to check status for. 155 jobs (int): number of jobs that can be running simultaneously. 156 remote (dvc.remote.base.RemoteBase): optional remote to compare 157 targets to. By default remote from core.remote config option 158 is used. 159 show_checksums (bool): show checksums instead of file names in 160 information messages. 161 """ 162 cloud = self._get_cloud(remote, "status") 163 return self.repo.cache.local.status( 164 targets, jobs=jobs, remote=cloud, show_checksums=show_checksums 165 ) 166