1# -*- coding: utf-8 -*-
2# Copyright 2013 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Implementation of compose command for Google Cloud Storage."""
16
17from __future__ import absolute_import
18
19from gslib.bucket_listing_ref import BucketListingObject
20from gslib.command import Command
21from gslib.command_argument import CommandArgument
22from gslib.cs_api_map import ApiSelector
23from gslib.encryption_helper import GetEncryptionTuple
24from gslib.exception import CommandException
25from gslib.storage_url import ContainsWildcard
26from gslib.storage_url import StorageUrlFromString
27from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
28from gslib.translation_helper import PreconditionsFromHeaders
29
30MAX_COMPONENT_COUNT = 1024
31MAX_COMPOSE_ARITY = 32
32MAX_COMPONENT_RATE = 200
33
34_SYNOPSIS = """
35  gsutil compose gs://bucket/obj1 [gs://bucket/obj2 ...] gs://bucket/composite
36"""
37
38_DETAILED_HELP_TEXT = ("""
39<B>SYNOPSIS</B>
40""" + _SYNOPSIS + """
41
42
43<B>DESCRIPTION</B>
44  The compose command creates a new object whose content is the concatenation
45  of a given sequence of component objects under the same bucket. gsutil uses
46  the content type of the first source object to determine the destination
47  object's content type. For more information, please see:
48  https://cloud.google.com/storage/docs/composite-objects
49
50  Note also that the gsutil cp command can automatically split uploads for
51  large files into multiple component objects, upload them in parallel, and
52  compose them into a final object (which will be subject to the component
53  count limit). This will still perform all uploads from a single machine. For
54  extremely large files and/or very low per-machine bandwidth, you may want to
55  split the file and upload it from multiple machines, and later compose these
56  parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
57  'gsutil help cp' for details.
58
59  Appending simply entails uploading your new data to a temporary object,
60  composing it with the growing append-target, and deleting the temporary
61  object:
62
63    $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
64    $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
65        gs://bucket/append-target
66    $ gsutil rm gs://bucket/data-to-append
67
68  Note that there is a limit (currently %d) to the number of components that can
69  be composed in a single operation.
70
71  There is a limit (currently %d) to the total number of components
72  for a given composite object. This means you can append to each object at most
73  %d times.
74
75  There is a per-project rate limit (currently %d) to the number of components
76  you can compose per second. This rate counts both the components being
77  appended to a composite object as well as the components being copied when
78  the composite object of which they are a part is copied.
79""" % (MAX_COMPOSE_ARITY, MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1, MAX_COMPONENT_RATE))
80
81
82class ComposeCommand(Command):
83  """Implementation of gsutil compose command."""
84
85  # Command specification. See base class for documentation.
86  command_spec = Command.CreateCommandSpec(
87      'compose',
88      command_name_aliases=['concat'],
89      usage_synopsis=_SYNOPSIS,
90      min_args=1,
91      max_args=MAX_COMPOSE_ARITY + 1,
92      supported_sub_args='',
93      # Not files, just object names without gs:// prefix.
94      file_url_ok=False,
95      provider_url_ok=False,
96      urls_start_arg=1,
97      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
98      gs_default_api=ApiSelector.JSON,
99      argparse_arguments=[
100          CommandArgument.MakeZeroOrMoreCloudURLsArgument()
101      ]
102  )
103  # Help specification. See help_provider.py for documentation.
104  help_spec = Command.HelpSpec(
105      help_name='compose',
106      help_name_aliases=['concat'],
107      help_type='command_help',
108      help_one_line_summary=(
109          'Concatenate a sequence of objects into a new composite object.'),
110      help_text=_DETAILED_HELP_TEXT,
111      subcommand_help_text={},
112  )
113
114  def CheckProvider(self, url):
115    if url.scheme != 'gs':
116      raise CommandException(
117          '"compose" called on URL with unsupported provider (%s).' % str(url))
118
119  # Command entry point.
120  def RunCommand(self):
121    """Command entry point for the compose command."""
122    target_url_str = self.args[-1]
123    self.args = self.args[:-1]
124    target_url = StorageUrlFromString(target_url_str)
125    self.CheckProvider(target_url)
126    if target_url.HasGeneration():
127      raise CommandException('A version-specific URL (%s) cannot be '
128                             'the destination for gsutil compose - abort.'
129                             % target_url)
130
131    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
132                                                bucket=target_url.bucket_name)
133
134    components = []
135    # Remember the first source object so we can get its content type.
136    first_src_url = None
137    for src_url_str in self.args:
138      if ContainsWildcard(src_url_str):
139        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
140      else:
141        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
142      for blr in src_url_iter:
143        src_url = blr.storage_url
144        self.CheckProvider(src_url)
145
146        if src_url.bucket_name != target_url.bucket_name:
147          raise CommandException(
148              'GCS does not support inter-bucket composing.')
149
150        if not first_src_url:
151          first_src_url = src_url
152        src_obj_metadata = (
153            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
154                name=src_url.object_name))
155        if src_url.HasGeneration():
156          src_obj_metadata.generation = src_url.generation
157        components.append(src_obj_metadata)
158        # Avoid expanding too many components, and sanity check each name
159        # expansion result.
160        if len(components) > MAX_COMPOSE_ARITY:
161          raise CommandException('"compose" called with too many component '
162                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
163
164    if not components:
165      raise CommandException('"compose" requires at least 1 component object.')
166
167    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
168        first_src_url.bucket_name, first_src_url.object_name,
169        provider=first_src_url.scheme, fields=['contentType']).contentType
170
171    preconditions = PreconditionsFromHeaders(self.headers or {})
172
173    self.logger.info(
174        'Composing %s from %d component object(s).',
175        target_url, len(components))
176    self.gsutil_api.ComposeObject(
177        components, dst_obj_metadata, preconditions=preconditions,
178        provider=target_url.scheme, encryption_tuple=GetEncryptionTuple())
179