1# -*- coding: utf-8 -*- # 2# Copyright 2015 Google LLC. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Utilities for interacting with Google Cloud Storage.""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import unicode_literals 21 22import argparse 23import os 24import re 25import string 26 27from googlecloudsdk.api_lib.util import apis as core_apis 28from googlecloudsdk.core import config 29from googlecloudsdk.core import exceptions 30from googlecloudsdk.core import execution_utils 31from googlecloudsdk.core import log 32from googlecloudsdk.core import resources 33from googlecloudsdk.core.util import files as file_utils 34from googlecloudsdk.core.util import platforms 35import six 36 37 38GSUTIL_BUCKET_PREFIX = 'gs://' 39 40 41class Error(exceptions.Error): 42 """Base class for exceptions in this module.""" 43 44 45class GsutilError(Error): 46 """Exception raised when gsutil cannot be found.""" 47 48 49class InvalidNameError(ValueError): 50 """Error indicating that a given name is invalid.""" 51 52 def __init__(self, name, reason, type_name, url): 53 super(InvalidNameError, self).__init__( 54 ('Invalid {type} name [{name}]: {reason}\n\n' 55 'See {url} for details.').format(name=name, reason=reason, 56 type=type_name, url=url)) 57 58 59class InvalidBucketNameError(InvalidNameError): 60 """Error indicating that a given bucket name is invalid.""" 61 TYPE = 'bucket' 62 URL = 'https://cloud.google.com/storage/docs/naming#requirements' 63 64 def __init__(self, name, reason): 65 super(InvalidBucketNameError, self).__init__( 66 name, reason, self.TYPE, self.URL) 67 68 69class InvalidObjectNameError(InvalidNameError): 70 """Error indicating that a given object name is invalid.""" 71 TYPE = 'object' 72 URL = 'https://cloud.google.com/storage/docs/naming#objectnames' 73 74 def __init__(self, name, reason): 75 super(InvalidObjectNameError, self).__init__( 76 name, reason, self.TYPE, self.URL) 77 78 79VALID_BUCKET_CHARS_MESSAGE = """\ 80Bucket names must contain only lowercase letters, numbers, dashes (-), \ 81underscores (_), and dots (.).""" 82VALID_BUCKET_START_END_MESSAGE = """\ 83Bucket names must start and end with a number or letter.""" 84VALID_BUCKET_LENGTH_MESSAGE = """\ 85Bucket names must contain 3 to 63 characters. \ 86Names containing dots can contain up to 222 characters, but each \ 87dot-separated component can be no longer than 63 characters.""" 88VALID_BUCKET_DOTTED_DECIMAL_MESSAGE = """\ 89Bucket names cannot be represented as an IP address in dotted-decimal \ 90notation (for example, 192.168.5.4).""" 91 92 93VALID_OBJECT_LENGTH_MESSAGE = """\ 94Object names can contain any sequence of valid Unicode characters, \ 95of length 1-1024 bytes when UTF-8 encoded.""" 96VALID_OBJECT_CHARS_MESSAGE = """\ 97Object names must not contain Carriage Return or Line Feed characters.""" 98 99 100def _ValidateBucketName(name): 101 """Validate the given bucket name according to the naming requirements. 102 103 See https://cloud.google.com/storage/docs/naming#requirements 104 105 Args: 106 name: the name of the bucket, not including 'gs://' 107 108 Raises: 109 InvalidBucketNameError: if the given bucket name is invalid 110 """ 111 components = name.split('.') 112 if not (3 <= len(name) <= 222) or any(len(c) > 63 for c in components): 113 raise InvalidBucketNameError(name, VALID_BUCKET_LENGTH_MESSAGE) 114 115 if set(name) - set(string.ascii_lowercase + string.digits + '-_.'): 116 raise InvalidBucketNameError(name, VALID_BUCKET_CHARS_MESSAGE) 117 118 if set(name[0] + name[-1]) - set(string.ascii_lowercase + string.digits): 119 raise InvalidBucketNameError(name, VALID_BUCKET_START_END_MESSAGE) 120 121 if len(components) == 4 and ''.join(components).isdigit(): 122 raise InvalidBucketNameError(name, VALID_BUCKET_DOTTED_DECIMAL_MESSAGE) 123 124 # Not validating the following guidelines, since Google can create such 125 # buckets and they may be read from: 126 # - Bucket names cannot begin with the "goog" prefix. 127 # - Bucket names cannot contain "google" or close misspellings of "google". 128 129 # Not validating the following guideline, because it seems to be a guideline 130 # and not a requirement: 131 # - Also, for DNS compliance and future compatibility, you should not use 132 # underscores (_) or have a period adjacent to another period or dash. For 133 # example, ".." or "-." or ".-" are not valid in DNS names. 134 135 136def ValidateBucketUrl(url): 137 # These are things that cause unhelpful error messages during parsing, so we 138 # check for them here. 139 if url.startswith(GSUTIL_BUCKET_PREFIX): 140 name = url[len(GSUTIL_BUCKET_PREFIX):] 141 else: 142 name = url 143 _ValidateBucketName(name.rstrip('/')) 144 145 146class BucketReference(object): 147 """A wrapper class to make working with GCS bucket names easier.""" 148 149 def __init__(self, bucket): 150 """Creates a BucketReference. 151 152 Args: 153 bucket: str, The bucket name 154 """ 155 self.bucket = bucket 156 157 @classmethod 158 def FromMessage(cls, bucket): 159 """Create a bucket reference from a bucket message from the API.""" 160 return cls(bucket.name) 161 162 @classmethod 163 def FromUrl(cls, url): 164 """Parse a bucket URL ('gs://' optional) into a BucketReference.""" 165 return cls(resources.REGISTRY.Parse(url, collection='storage.buckets') 166 .bucket) 167 168 @classmethod 169 def FromArgument(cls, value, require_prefix=True): 170 """Validates that the argument is a reference to a Cloud Storage bucket.""" 171 if require_prefix and not value.startswith(GSUTIL_BUCKET_PREFIX): 172 raise argparse.ArgumentTypeError( 173 'Must be a valid Google Cloud Storage bucket of the form ' 174 '[gs://somebucket]') 175 176 try: 177 ValidateBucketUrl(value) 178 except InvalidBucketNameError as err: 179 raise argparse.ArgumentTypeError(six.text_type(err)) 180 181 return cls.FromUrl(value) 182 183 def ToUrl(self): 184 return 'gs://{}'.format(self.bucket) 185 186 def GetPublicUrl(self): 187 return 'https://storage.googleapis.com/{0}'.format(self.bucket) 188 189 def __eq__(self, other): 190 return self.bucket == other.bucket 191 192 def __ne__(self, other): 193 return not self.__eq__(other) 194 195 def __hash__(self): 196 return hash(self.bucket) 197 198 199class ObjectReference(object): 200 """Wrapper class to make working with Cloud Storage bucket/objects easier.""" 201 202 GSUTIL_OBJECT_REGEX = r'^gs://(?P<bucket>[^/]+)/(?P<object>.+)' 203 GSUTIL_BUCKET_REGEX = r'^gs://(?P<bucket>[^/]+)/?' 204 205 def __init__(self, bucket, name): 206 self.bucket = bucket 207 self.name = name 208 209 @property 210 def object(self): 211 """Emulates the object field on the object core/resource ref.""" 212 return self.name 213 214 @property 215 def bucket_ref(self): 216 """Gets a bucket reference for the bucket this object is in.""" 217 return BucketReference(self.bucket) 218 219 @classmethod 220 def FromMessage(cls, obj): 221 """Create an object reference from an object message from the API.""" 222 return cls(obj.bucket, obj.name) 223 224 @classmethod 225 def FromName(cls, bucket, name): 226 """Create an object reference after ensuring the name is valid.""" 227 _ValidateBucketName(bucket) 228 # TODO(b/118379726): Fully implement the object naming requirement checks. 229 # See https://cloud.google.com/storage/docs/naming#objectnames 230 if not 0 <= len(name.encode('utf-8')) <= 1024: 231 raise InvalidObjectNameError(name, VALID_OBJECT_LENGTH_MESSAGE) 232 if '\r' in name or '\n' in name: 233 raise InvalidObjectNameError(name, VALID_OBJECT_CHARS_MESSAGE) 234 return cls(bucket, name) 235 236 @classmethod 237 def FromBucketRef(cls, bucket_ref, name): 238 """Create an object reference from a bucket reference and a name.""" 239 return cls.FromName(bucket_ref.bucket, name) 240 241 @classmethod 242 def FromUrl(cls, url, allow_empty_object=False): 243 """Parse an object URL ('gs://' required) into an ObjectReference.""" 244 match = re.match(cls.GSUTIL_OBJECT_REGEX, url, re.DOTALL) 245 if match: 246 return cls.FromName(match.group('bucket'), match.group('object')) 247 match = re.match(cls.GSUTIL_BUCKET_REGEX, url, re.DOTALL) 248 if match: 249 if allow_empty_object: 250 return cls(match.group('bucket'), '') 251 else: 252 raise InvalidObjectNameError('', 'Empty object name is not allowed') 253 raise ValueError('Must be of form gs://bucket/object') 254 255 @classmethod 256 def FromArgument(cls, url, allow_empty_object=False): 257 try: 258 return cls.FromUrl(url, allow_empty_object=allow_empty_object) 259 except (InvalidObjectNameError, ValueError) as err: 260 raise argparse.ArgumentTypeError(six.text_type(err)) 261 262 @classmethod 263 def IsStorageUrl(cls, path): 264 try: 265 cls.FromUrl(path) 266 except ValueError: 267 return False 268 return True 269 270 def ToUrl(self): 271 return 'gs://{}/{}'.format(self.bucket, self.name) 272 273 def GetPublicUrl(self): 274 return 'https://storage.googleapis.com/{}/{}'.format(self.bucket, self.name) 275 276 def __eq__(self, other): 277 return self.ToUrl() == other.ToUrl() 278 279 def __ne__(self, other): 280 return not self.__eq__(other) 281 282 def __hash__(self): 283 return hash(self.ToUrl()) 284 285 286def GetMessages(): 287 """Import and return the appropriate storage messages module.""" 288 return core_apis.GetMessagesModule('storage', 'v1') 289 290 291def GetClient(): 292 """Import and return the appropriate storage client.""" 293 return core_apis.GetClientInstance('storage', 'v1') 294 295 296def _GetGsutilPath(): 297 """Determines the path to the gsutil binary.""" 298 sdk_bin_path = config.Paths().sdk_bin_path 299 if not sdk_bin_path: 300 # Check if gsutil is located on the PATH. 301 gsutil_path = file_utils.FindExecutableOnPath('gsutil') 302 if gsutil_path: 303 log.debug('Using gsutil found at [{path}]'.format(path=gsutil_path)) 304 return gsutil_path 305 else: 306 raise GsutilError('A path to the storage client `gsutil` could not be ' 307 'found. Please check your SDK installation.') 308 return os.path.join(sdk_bin_path, 'gsutil') 309 310 311def RunGsutilCommand(command_name, 312 command_args=None, 313 run_concurrent=False, 314 out_func=log.file_only_logger.debug, 315 err_func=log.file_only_logger.debug): 316 """Runs the specified gsutil command and returns the command's exit code. 317 318 WARNING: This is not compatible with python 3 and should no longer be used. 319 320 Args: 321 command_name: The gsutil command to run. 322 command_args: List of arguments to pass to the command. 323 run_concurrent: Whether concurrent uploads should be enabled while running 324 the command. 325 out_func: str->None, a function to call with the stdout of the gsutil 326 command. 327 err_func: str->None, a function to call with the stderr of the gsutil 328 command. 329 330 Returns: 331 The exit code of the call to the gsutil command. 332 """ 333 command_path = _GetGsutilPath() 334 335 args = ['-m', command_name] if run_concurrent else [command_name] 336 if command_args is not None: 337 args += command_args 338 339 if platforms.OperatingSystem.Current() == platforms.OperatingSystem.WINDOWS: 340 gsutil_args = execution_utils.ArgsForCMDTool(command_path + '.cmd', *args) 341 else: 342 gsutil_args = execution_utils.ArgsForExecutableTool(command_path, *args) 343 log.debug('Running command: [{args}]]'.format(args=' '.join(gsutil_args))) 344 return execution_utils.Exec(gsutil_args, no_exit=True, 345 out_func=out_func, 346 err_func=err_func) 347