1# -*- coding: utf-8 -*- #
2# Copyright 2015 Google LLC. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#    http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Utilities for interacting with Google Cloud Storage."""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import unicode_literals
21
22import argparse
23import os
24import re
25import string
26
27from googlecloudsdk.api_lib.util import apis as core_apis
28from googlecloudsdk.core import config
29from googlecloudsdk.core import exceptions
30from googlecloudsdk.core import execution_utils
31from googlecloudsdk.core import log
32from googlecloudsdk.core import resources
33from googlecloudsdk.core.util import files as file_utils
34from googlecloudsdk.core.util import platforms
35import six
36
37
38GSUTIL_BUCKET_PREFIX = 'gs://'
39
40
41class Error(exceptions.Error):
42  """Base class for exceptions in this module."""
43
44
45class GsutilError(Error):
46  """Exception raised when gsutil cannot be found."""
47
48
49class InvalidNameError(ValueError):
50  """Error indicating that a given name is invalid."""
51
52  def __init__(self, name, reason, type_name, url):
53    super(InvalidNameError, self).__init__(
54        ('Invalid {type} name [{name}]: {reason}\n\n'
55         'See {url} for details.').format(name=name, reason=reason,
56                                          type=type_name, url=url))
57
58
59class InvalidBucketNameError(InvalidNameError):
60  """Error indicating that a given bucket name is invalid."""
61  TYPE = 'bucket'
62  URL = 'https://cloud.google.com/storage/docs/naming#requirements'
63
64  def __init__(self, name, reason):
65    super(InvalidBucketNameError, self).__init__(
66        name, reason, self.TYPE, self.URL)
67
68
69class InvalidObjectNameError(InvalidNameError):
70  """Error indicating that a given object name is invalid."""
71  TYPE = 'object'
72  URL = 'https://cloud.google.com/storage/docs/naming#objectnames'
73
74  def __init__(self, name, reason):
75    super(InvalidObjectNameError, self).__init__(
76        name, reason, self.TYPE, self.URL)
77
78
79VALID_BUCKET_CHARS_MESSAGE = """\
80Bucket names must contain only lowercase letters, numbers, dashes (-), \
81underscores (_), and dots (.)."""
82VALID_BUCKET_START_END_MESSAGE = """\
83Bucket names must start and end with a number or letter."""
84VALID_BUCKET_LENGTH_MESSAGE = """\
85Bucket names must contain 3 to 63 characters. \
86Names containing dots can contain up to 222 characters, but each \
87dot-separated component can be no longer than 63 characters."""
88VALID_BUCKET_DOTTED_DECIMAL_MESSAGE = """\
89Bucket names cannot be represented as an IP address in dotted-decimal \
90notation (for example, 192.168.5.4)."""
91
92
93VALID_OBJECT_LENGTH_MESSAGE = """\
94Object names can contain any sequence of valid Unicode characters, \
95of length 1-1024 bytes when UTF-8 encoded."""
96VALID_OBJECT_CHARS_MESSAGE = """\
97Object names must not contain Carriage Return or Line Feed characters."""
98
99
100def _ValidateBucketName(name):
101  """Validate the given bucket name according to the naming requirements.
102
103  See https://cloud.google.com/storage/docs/naming#requirements
104
105  Args:
106    name: the name of the bucket, not including 'gs://'
107
108  Raises:
109    InvalidBucketNameError: if the given bucket name is invalid
110  """
111  components = name.split('.')
112  if not (3 <= len(name) <= 222) or any(len(c) > 63 for c in components):
113    raise InvalidBucketNameError(name, VALID_BUCKET_LENGTH_MESSAGE)
114
115  if set(name) - set(string.ascii_lowercase + string.digits + '-_.'):
116    raise InvalidBucketNameError(name, VALID_BUCKET_CHARS_MESSAGE)
117
118  if set(name[0] + name[-1]) - set(string.ascii_lowercase + string.digits):
119    raise InvalidBucketNameError(name, VALID_BUCKET_START_END_MESSAGE)
120
121  if len(components) == 4 and ''.join(components).isdigit():
122    raise InvalidBucketNameError(name, VALID_BUCKET_DOTTED_DECIMAL_MESSAGE)
123
124  # Not validating the following guidelines, since Google can create such
125  # buckets and they may be read from:
126  # - Bucket names cannot begin with the "goog" prefix.
127  # - Bucket names cannot contain "google" or close misspellings of "google".
128
129  # Not validating the following guideline, because it seems to be a guideline
130  # and not a requirement:
131  # - Also, for DNS compliance and future compatibility, you should not use
132  #   underscores (_) or have a period adjacent to another period or dash. For
133  #   example, ".." or "-." or ".-" are not valid in DNS names.
134
135
136def ValidateBucketUrl(url):
137  # These are things that cause unhelpful error messages during parsing, so we
138  # check for them here.
139  if url.startswith(GSUTIL_BUCKET_PREFIX):
140    name = url[len(GSUTIL_BUCKET_PREFIX):]
141  else:
142    name = url
143  _ValidateBucketName(name.rstrip('/'))
144
145
146class BucketReference(object):
147  """A wrapper class to make working with GCS bucket names easier."""
148
149  def __init__(self, bucket):
150    """Creates a BucketReference.
151
152    Args:
153      bucket: str, The bucket name
154    """
155    self.bucket = bucket
156
157  @classmethod
158  def FromMessage(cls, bucket):
159    """Create a bucket reference from a bucket message from the API."""
160    return cls(bucket.name)
161
162  @classmethod
163  def FromUrl(cls, url):
164    """Parse a bucket URL ('gs://' optional) into a BucketReference."""
165    return cls(resources.REGISTRY.Parse(url, collection='storage.buckets')
166               .bucket)
167
168  @classmethod
169  def FromArgument(cls, value, require_prefix=True):
170    """Validates that the argument is a reference to a Cloud Storage bucket."""
171    if require_prefix and not value.startswith(GSUTIL_BUCKET_PREFIX):
172      raise argparse.ArgumentTypeError(
173          'Must be a valid Google Cloud Storage bucket of the form '
174          '[gs://somebucket]')
175
176    try:
177      ValidateBucketUrl(value)
178    except InvalidBucketNameError as err:
179      raise argparse.ArgumentTypeError(six.text_type(err))
180
181    return cls.FromUrl(value)
182
183  def ToUrl(self):
184    return 'gs://{}'.format(self.bucket)
185
186  def GetPublicUrl(self):
187    return 'https://storage.googleapis.com/{0}'.format(self.bucket)
188
189  def __eq__(self, other):
190    return self.bucket == other.bucket
191
192  def __ne__(self, other):
193    return not self.__eq__(other)
194
195  def __hash__(self):
196    return hash(self.bucket)
197
198
199class ObjectReference(object):
200  """Wrapper class to make working with Cloud Storage bucket/objects easier."""
201
202  GSUTIL_OBJECT_REGEX = r'^gs://(?P<bucket>[^/]+)/(?P<object>.+)'
203  GSUTIL_BUCKET_REGEX = r'^gs://(?P<bucket>[^/]+)/?'
204
205  def __init__(self, bucket, name):
206    self.bucket = bucket
207    self.name = name
208
209  @property
210  def object(self):
211    """Emulates the object field on the object core/resource ref."""
212    return self.name
213
214  @property
215  def bucket_ref(self):
216    """Gets a bucket reference for the bucket this object is in."""
217    return BucketReference(self.bucket)
218
219  @classmethod
220  def FromMessage(cls, obj):
221    """Create an object reference from an object message from the API."""
222    return cls(obj.bucket, obj.name)
223
224  @classmethod
225  def FromName(cls, bucket, name):
226    """Create an object reference after ensuring the name is valid."""
227    _ValidateBucketName(bucket)
228    # TODO(b/118379726): Fully implement the object naming requirement checks.
229    # See https://cloud.google.com/storage/docs/naming#objectnames
230    if not 0 <= len(name.encode('utf-8')) <= 1024:
231      raise InvalidObjectNameError(name, VALID_OBJECT_LENGTH_MESSAGE)
232    if '\r' in name or '\n' in name:
233      raise InvalidObjectNameError(name, VALID_OBJECT_CHARS_MESSAGE)
234    return cls(bucket, name)
235
236  @classmethod
237  def FromBucketRef(cls, bucket_ref, name):
238    """Create an object reference from a bucket reference and a name."""
239    return cls.FromName(bucket_ref.bucket, name)
240
241  @classmethod
242  def FromUrl(cls, url, allow_empty_object=False):
243    """Parse an object URL ('gs://' required) into an ObjectReference."""
244    match = re.match(cls.GSUTIL_OBJECT_REGEX, url, re.DOTALL)
245    if match:
246      return cls.FromName(match.group('bucket'), match.group('object'))
247    match = re.match(cls.GSUTIL_BUCKET_REGEX, url, re.DOTALL)
248    if match:
249      if allow_empty_object:
250        return cls(match.group('bucket'), '')
251      else:
252        raise InvalidObjectNameError('', 'Empty object name is not allowed')
253    raise ValueError('Must be of form gs://bucket/object')
254
255  @classmethod
256  def FromArgument(cls, url, allow_empty_object=False):
257    try:
258      return cls.FromUrl(url, allow_empty_object=allow_empty_object)
259    except (InvalidObjectNameError, ValueError) as err:
260      raise argparse.ArgumentTypeError(six.text_type(err))
261
262  @classmethod
263  def IsStorageUrl(cls, path):
264    try:
265      cls.FromUrl(path)
266    except ValueError:
267      return False
268    return True
269
270  def ToUrl(self):
271    return 'gs://{}/{}'.format(self.bucket, self.name)
272
273  def GetPublicUrl(self):
274    return 'https://storage.googleapis.com/{}/{}'.format(self.bucket, self.name)
275
276  def __eq__(self, other):
277    return self.ToUrl() == other.ToUrl()
278
279  def __ne__(self, other):
280    return not self.__eq__(other)
281
282  def __hash__(self):
283    return hash(self.ToUrl())
284
285
286def GetMessages():
287  """Import and return the appropriate storage messages module."""
288  return core_apis.GetMessagesModule('storage', 'v1')
289
290
291def GetClient():
292  """Import and return the appropriate storage client."""
293  return core_apis.GetClientInstance('storage', 'v1')
294
295
296def _GetGsutilPath():
297  """Determines the path to the gsutil binary."""
298  sdk_bin_path = config.Paths().sdk_bin_path
299  if not sdk_bin_path:
300    # Check if gsutil is located on the PATH.
301    gsutil_path = file_utils.FindExecutableOnPath('gsutil')
302    if gsutil_path:
303      log.debug('Using gsutil found at [{path}]'.format(path=gsutil_path))
304      return gsutil_path
305    else:
306      raise GsutilError('A path to the storage client `gsutil` could not be '
307                        'found. Please check your SDK installation.')
308  return os.path.join(sdk_bin_path, 'gsutil')
309
310
311def RunGsutilCommand(command_name,
312                     command_args=None,
313                     run_concurrent=False,
314                     out_func=log.file_only_logger.debug,
315                     err_func=log.file_only_logger.debug):
316  """Runs the specified gsutil command and returns the command's exit code.
317
318  WARNING: This is not compatible with python 3 and should no longer be used.
319
320  Args:
321    command_name: The gsutil command to run.
322    command_args: List of arguments to pass to the command.
323    run_concurrent: Whether concurrent uploads should be enabled while running
324      the command.
325    out_func: str->None, a function to call with the stdout of the gsutil
326        command.
327    err_func: str->None, a function to call with the stderr of the gsutil
328        command.
329
330  Returns:
331    The exit code of the call to the gsutil command.
332  """
333  command_path = _GetGsutilPath()
334
335  args = ['-m', command_name] if run_concurrent else [command_name]
336  if command_args is not None:
337    args += command_args
338
339  if platforms.OperatingSystem.Current() == platforms.OperatingSystem.WINDOWS:
340    gsutil_args = execution_utils.ArgsForCMDTool(command_path + '.cmd', *args)
341  else:
342    gsutil_args = execution_utils.ArgsForExecutableTool(command_path, *args)
343  log.debug('Running command: [{args}]]'.format(args=' '.join(gsutil_args)))
344  return execution_utils.Exec(gsutil_args, no_exit=True,
345                              out_func=out_func,
346                              err_func=err_func)
347