1# -*- coding: utf-8 -*- 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Implementation of compose command for Google Cloud Storage.""" 16 17from __future__ import absolute_import 18 19from gslib.bucket_listing_ref import BucketListingObject 20from gslib.command import Command 21from gslib.command_argument import CommandArgument 22from gslib.cs_api_map import ApiSelector 23from gslib.encryption_helper import GetEncryptionTuple 24from gslib.exception import CommandException 25from gslib.storage_url import ContainsWildcard 26from gslib.storage_url import StorageUrlFromString 27from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages 28from gslib.translation_helper import PreconditionsFromHeaders 29 30MAX_COMPONENT_COUNT = 1024 31MAX_COMPOSE_ARITY = 32 32MAX_COMPONENT_RATE = 200 33 34_SYNOPSIS = """ 35 gsutil compose gs://bucket/obj1 [gs://bucket/obj2 ...] gs://bucket/composite 36""" 37 38_DETAILED_HELP_TEXT = (""" 39<B>SYNOPSIS</B> 40""" + _SYNOPSIS + """ 41 42 43<B>DESCRIPTION</B> 44 The compose command creates a new object whose content is the concatenation 45 of a given sequence of component objects under the same bucket. gsutil uses 46 the content type of the first source object to determine the destination 47 object's content type. For more information, please see: 48 https://cloud.google.com/storage/docs/composite-objects 49 50 Note also that the gsutil cp command can automatically split uploads for 51 large files into multiple component objects, upload them in parallel, and 52 compose them into a final object (which will be subject to the component 53 count limit). This will still perform all uploads from a single machine. For 54 extremely large files and/or very low per-machine bandwidth, you may want to 55 split the file and upload it from multiple machines, and later compose these 56 parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under 57 'gsutil help cp' for details. 58 59 Appending simply entails uploading your new data to a temporary object, 60 composing it with the growing append-target, and deleting the temporary 61 object: 62 63 $ echo 'new data' | gsutil cp - gs://bucket/data-to-append 64 $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\ 65 gs://bucket/append-target 66 $ gsutil rm gs://bucket/data-to-append 67 68 Note that there is a limit (currently %d) to the number of components that can 69 be composed in a single operation. 70 71 There is a limit (currently %d) to the total number of components 72 for a given composite object. This means you can append to each object at most 73 %d times. 74 75 There is a per-project rate limit (currently %d) to the number of components 76 you can compose per second. This rate counts both the components being 77 appended to a composite object as well as the components being copied when 78 the composite object of which they are a part is copied. 79""" % (MAX_COMPOSE_ARITY, MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1, MAX_COMPONENT_RATE)) 80 81 82class ComposeCommand(Command): 83 """Implementation of gsutil compose command.""" 84 85 # Command specification. See base class for documentation. 86 command_spec = Command.CreateCommandSpec( 87 'compose', 88 command_name_aliases=['concat'], 89 usage_synopsis=_SYNOPSIS, 90 min_args=1, 91 max_args=MAX_COMPOSE_ARITY + 1, 92 supported_sub_args='', 93 # Not files, just object names without gs:// prefix. 94 file_url_ok=False, 95 provider_url_ok=False, 96 urls_start_arg=1, 97 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 98 gs_default_api=ApiSelector.JSON, 99 argparse_arguments=[ 100 CommandArgument.MakeZeroOrMoreCloudURLsArgument() 101 ] 102 ) 103 # Help specification. See help_provider.py for documentation. 104 help_spec = Command.HelpSpec( 105 help_name='compose', 106 help_name_aliases=['concat'], 107 help_type='command_help', 108 help_one_line_summary=( 109 'Concatenate a sequence of objects into a new composite object.'), 110 help_text=_DETAILED_HELP_TEXT, 111 subcommand_help_text={}, 112 ) 113 114 def CheckProvider(self, url): 115 if url.scheme != 'gs': 116 raise CommandException( 117 '"compose" called on URL with unsupported provider (%s).' % str(url)) 118 119 # Command entry point. 120 def RunCommand(self): 121 """Command entry point for the compose command.""" 122 target_url_str = self.args[-1] 123 self.args = self.args[:-1] 124 target_url = StorageUrlFromString(target_url_str) 125 self.CheckProvider(target_url) 126 if target_url.HasGeneration(): 127 raise CommandException('A version-specific URL (%s) cannot be ' 128 'the destination for gsutil compose - abort.' 129 % target_url) 130 131 dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, 132 bucket=target_url.bucket_name) 133 134 components = [] 135 # Remember the first source object so we can get its content type. 136 first_src_url = None 137 for src_url_str in self.args: 138 if ContainsWildcard(src_url_str): 139 src_url_iter = self.WildcardIterator(src_url_str).IterObjects() 140 else: 141 src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] 142 for blr in src_url_iter: 143 src_url = blr.storage_url 144 self.CheckProvider(src_url) 145 146 if src_url.bucket_name != target_url.bucket_name: 147 raise CommandException( 148 'GCS does not support inter-bucket composing.') 149 150 if not first_src_url: 151 first_src_url = src_url 152 src_obj_metadata = ( 153 apitools_messages.ComposeRequest.SourceObjectsValueListEntry( 154 name=src_url.object_name)) 155 if src_url.HasGeneration(): 156 src_obj_metadata.generation = src_url.generation 157 components.append(src_obj_metadata) 158 # Avoid expanding too many components, and sanity check each name 159 # expansion result. 160 if len(components) > MAX_COMPOSE_ARITY: 161 raise CommandException('"compose" called with too many component ' 162 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) 163 164 if not components: 165 raise CommandException('"compose" requires at least 1 component object.') 166 167 dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( 168 first_src_url.bucket_name, first_src_url.object_name, 169 provider=first_src_url.scheme, fields=['contentType']).contentType 170 171 preconditions = PreconditionsFromHeaders(self.headers or {}) 172 173 self.logger.info( 174 'Composing %s from %d component object(s).', 175 target_url, len(components)) 176 self.gsutil_api.ComposeObject( 177 components, dst_obj_metadata, preconditions=preconditions, 178 provider=target_url.scheme, encryption_tuple=GetEncryptionTuple()) 179