1# -*- coding: utf-8 -*- 2# Copyright 2012 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Name expansion iterator and result classes. 16 17Name expansion support for the various ways gsutil lets users refer to 18collections of data (via explicit wildcarding as well as directory, 19bucket, and bucket subdir implicit wildcarding). This class encapsulates 20the various rules for determining how these expansions are done. 21""" 22 23from __future__ import absolute_import 24from __future__ import print_function 25from __future__ import division 26from __future__ import unicode_literals 27 28import collections 29import logging 30import os 31import sys 32 33import six 34 35from apitools.base.py import encoding 36import gslib 37from gslib.exception import CommandException 38from gslib.exception import NO_URLS_MATCHED_GENERIC 39from gslib.exception import NO_URLS_MATCHED_TARGET 40from gslib.plurality_checkable_iterator import PluralityCheckableIterator 41from gslib.seek_ahead_thread import SeekAheadResult 42from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages 43import gslib.wildcard_iterator 44from gslib.wildcard_iterator import StorageUrlFromString 45 46 47class NameExpansionResult(object): 48 """Holds one fully expanded result from iterating over NameExpansionIterator. 49 50 The member data in this class need to be pickleable because 51 NameExpansionResult instances are passed through Multiprocessing.Queue. In 52 particular, don't include any boto state like StorageUri, since that pulls 53 in a big tree of objects, some of which aren't pickleable (and even if 54 they were, pickling/unpickling such a large object tree would result in 55 significant overhead). 56 57 The state held in this object is needed for handling the various naming cases 58 (e.g., copying from a single source URL to a directory generates different 59 dest URL names than copying multiple URLs to a directory, to be consistent 60 with naming rules used by the Unix cp command). For more details see comments 61 in _NameExpansionIterator. 62 """ 63 64 def __init__(self, source_storage_url, is_multi_source_request, 65 names_container, expanded_storage_url, expanded_result): 66 """Instantiates a result from name expansion. 67 68 Args: 69 source_storage_url: StorageUrl that was being expanded. 70 is_multi_source_request: bool indicator whether src_url_str expanded to 71 more than one BucketListingRef. 72 names_container: Bool indicator whether src_url names a container. 73 expanded_storage_url: StorageUrl that was expanded. 74 expanded_result: cloud object metadata in MessageToJson form (for 75 pickleability), if any was iterated; None otherwise. 76 Consumers must call JsonToMessage to get an apitools Object. 77 """ 78 self.source_storage_url = source_storage_url 79 self.is_multi_source_request = is_multi_source_request 80 self.names_container = names_container 81 self.expanded_storage_url = expanded_storage_url 82 self.expanded_result = encoding.MessageToJson( 83 expanded_result) if expanded_result else None 84 85 def __repr__(self): 86 return '%s' % self.expanded_storage_url 87 88 89class _NameExpansionIterator(object): 90 """Class that iterates over all source URLs passed to the iterator. 91 92 See details in __iter__ function doc. 93 """ 94 95 def __init__(self, 96 command_name, 97 debug, 98 logger, 99 gsutil_api, 100 url_strs, 101 recursion_requested, 102 all_versions=False, 103 cmd_supports_recursion=True, 104 project_id=None, 105 ignore_symlinks=False, 106 continue_on_error=False, 107 bucket_listing_fields=None): 108 """Creates a NameExpansionIterator. 109 110 Args: 111 command_name: name of command being run. 112 debug: Debug level to pass to underlying iterators (range 0..3). 113 logger: logging.Logger object. 114 gsutil_api: Cloud storage interface. Settable for testing/mocking. 115 url_strs: PluralityCheckableIterator of URL strings needing expansion. 116 recursion_requested: True if -r specified on command-line. If so, 117 listings will be flattened so mapped-to results contain objects 118 spanning subdirectories. 119 all_versions: Bool indicating whether to iterate over all object versions. 120 cmd_supports_recursion: Bool indicating whether this command supports a 121 '-r' flag. Useful for printing helpful error messages. 122 project_id: Project id to use for bucket retrieval. 123 ignore_symlinks: If True, ignore symlinks during iteration. 124 continue_on_error: If true, yield no-match exceptions encountered during 125 iteration instead of raising them. 126 bucket_listing_fields: Iterable fields to include in expanded results. 127 Ex. ['name', 'acl']. Underyling iterator is responsible for converting 128 these to list-style format ['items/name', 'items/acl']. If this is 129 None, only the object name is included in the result. 130 131 Examples of _NameExpansionIterator with recursion_requested=True: 132 - Calling with one of the url_strs being 'gs://bucket' will enumerate all 133 top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'. 134 - 'gs://bucket/**' will enumerate all objects in the bucket. 135 - 'gs://bucket/abc' will enumerate either the single object abc or, if 136 abc is a subdirectory, all objects under abc and any of its 137 subdirectories. 138 - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its 139 subdirectories. 140 - 'file:///tmp' will enumerate all files under /tmp, as will 141 'file:///tmp/*' 142 - 'file:///tmp/**' will enumerate all files under /tmp or any of its 143 subdirectories. 144 145 Example if recursion_requested=False: 146 calling with gs://bucket/abc/* lists matching objects 147 or subdirs, but not sub-subdirs or objects beneath subdirs. 148 149 Note: In step-by-step comments below we give examples assuming there's a 150 gs://bucket with object paths: 151 abcd/o1.txt 152 abcd/o2.txt 153 xyz/o1.txt 154 xyz/o2.txt 155 and a directory file://dir with file paths: 156 dir/a.txt 157 dir/b.txt 158 dir/c/ 159 """ 160 self.command_name = command_name 161 self.debug = debug 162 self.logger = logger 163 self.gsutil_api = gsutil_api 164 self.url_strs = url_strs 165 self.recursion_requested = recursion_requested 166 self.all_versions = all_versions 167 # Check self.url_strs.HasPlurality() at start because its value can change 168 # if url_strs is itself an iterator. 169 self.url_strs.has_plurality = self.url_strs.HasPlurality() 170 self.cmd_supports_recursion = cmd_supports_recursion 171 self.project_id = project_id 172 self.ignore_symlinks = ignore_symlinks 173 self.continue_on_error = continue_on_error 174 self.bucket_listing_fields = (set(['name']) if not bucket_listing_fields 175 else bucket_listing_fields) 176 177 # Map holding wildcard strings to use for flat vs subdir-by-subdir listings. 178 # (A flat listing means show all objects expanded all the way down.) 179 self._flatness_wildcard = {True: '**', False: '*'} 180 181 def __iter__(self): 182 """Iterates over all source URLs passed to the iterator. 183 184 For each src url, expands wildcards, object-less bucket names, 185 subdir bucket names, and directory names, and generates a flat listing of 186 all the matching objects/files. 187 188 You should instantiate this object using the static factory function 189 NameExpansionIterator, because consumers of this iterator need the 190 PluralityCheckableIterator wrapper built by that function. 191 192 Yields: 193 gslib.name_expansion.NameExpansionResult. 194 195 Raises: 196 CommandException: if errors encountered. 197 """ 198 for url_str in self.url_strs: 199 storage_url = StorageUrlFromString(url_str) 200 201 if (storage_url.IsFileUrl() and 202 (storage_url.IsStream() or storage_url.IsFifo())): 203 if self.url_strs.has_plurality: 204 raise CommandException('Multiple URL strings are not supported ' 205 'with streaming ("-") URLs or named pipes.') 206 yield NameExpansionResult(storage_url, False, False, storage_url, None) 207 continue 208 209 # Step 1: Expand any explicitly specified wildcards. The output from this 210 # step is an iterator of BucketListingRef. 211 # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd 212 213 src_names_bucket = False 214 if (storage_url.IsCloudUrl() and storage_url.IsBucket() and 215 not self.recursion_requested): 216 # UNIX commands like rm and cp will omit directory references. 217 # If url_str refers only to buckets and we are not recursing, 218 # then produce references of type BUCKET, because they are guaranteed 219 # to pass through Step 2 and be omitted in Step 3. 220 post_step1_iter = PluralityCheckableIterator( 221 self.WildcardIterator(url_str).IterBuckets(bucket_fields=['id'])) 222 else: 223 # Get a list of objects and prefixes, expanding the top level for 224 # any listed buckets. If our source is a bucket, however, we need 225 # to treat all of the top level expansions as names_container=True. 226 post_step1_iter = PluralityCheckableIterator( 227 self.WildcardIterator(url_str).IterAll( 228 bucket_listing_fields=self.bucket_listing_fields, 229 expand_top_level_buckets=True)) 230 if storage_url.IsCloudUrl() and storage_url.IsBucket(): 231 src_names_bucket = True 232 233 # Step 2: Expand bucket subdirs. The output from this 234 # step is an iterator of (names_container, BucketListingRef). 235 # Starting with gs://bucket/abcd this step would expand to: 236 # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). 237 subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested] 238 if self.recursion_requested: 239 post_step2_iter = _ImplicitBucketSubdirIterator( 240 self, post_step1_iter, subdir_exp_wildcard, 241 self.bucket_listing_fields) 242 else: 243 post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) 244 post_step2_iter = PluralityCheckableIterator(post_step2_iter) 245 246 # Because we actually perform and check object listings here, this will 247 # raise if url_args includes a non-existent object. However, 248 # plurality_checkable_iterator will buffer the exception for us, not 249 # raising it until the iterator is actually asked to yield the first 250 # result. 251 if post_step2_iter.IsEmpty(): 252 if self.continue_on_error: 253 try: 254 raise CommandException(NO_URLS_MATCHED_TARGET % url_str) 255 except CommandException as e: 256 # Yield a specialized tuple of (exception, stack_trace) to 257 # the wrapping PluralityCheckableIterator. 258 yield (e, sys.exc_info()[2]) 259 else: 260 raise CommandException(NO_URLS_MATCHED_TARGET % url_str) 261 262 # Step 3. Omit any directories, buckets, or bucket subdirectories for 263 # non-recursive expansions. 264 post_step3_iter = PluralityCheckableIterator( 265 _OmitNonRecursiveIterator(post_step2_iter, self.recursion_requested, 266 self.command_name, 267 self.cmd_supports_recursion, self.logger)) 268 269 src_url_expands_to_multi = post_step3_iter.HasPlurality() 270 is_multi_source_request = (self.url_strs.has_plurality or 271 src_url_expands_to_multi) 272 273 # Step 4. Expand directories and buckets. This step yields the iterated 274 # values. Starting with gs://bucket this step would expand to: 275 # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] 276 # Starting with file://dir this step would expand to: 277 # [dir/a.txt, dir/b.txt, dir/c/] 278 for (names_container, blr) in post_step3_iter: 279 src_names_container = src_names_bucket or names_container 280 281 if blr.IsObject(): 282 yield NameExpansionResult(storage_url, is_multi_source_request, 283 src_names_container, blr.storage_url, 284 blr.root_object) 285 else: 286 # Use implicit wildcarding to do the enumeration. 287 # At this point we are guaranteed that: 288 # - Recursion has been requested because non-object entries are 289 # filtered in step 3 otherwise. 290 # - This is a prefix or bucket subdirectory because only 291 # non-recursive iterations product bucket references. 292 expanded_url = StorageUrlFromString(blr.url_string) 293 if expanded_url.IsFileUrl(): 294 # Convert dir to implicit recursive wildcard. 295 url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard) 296 else: 297 # Convert subdir to implicit recursive wildcard. 298 url_to_iterate = expanded_url.CreatePrefixUrl( 299 wildcard_suffix=subdir_exp_wildcard) 300 301 wc_iter = PluralityCheckableIterator( 302 self.WildcardIterator(url_to_iterate).IterObjects( 303 bucket_listing_fields=self.bucket_listing_fields)) 304 src_url_expands_to_multi = (src_url_expands_to_multi or 305 wc_iter.HasPlurality()) 306 is_multi_source_request = (self.url_strs.has_plurality or 307 src_url_expands_to_multi) 308 # This will be a flattened listing of all underlying objects in the 309 # subdir. 310 for blr in wc_iter: 311 yield NameExpansionResult(storage_url, is_multi_source_request, 312 True, blr.storage_url, blr.root_object) 313 314 def WildcardIterator(self, url_string): 315 """Helper to instantiate gslib.WildcardIterator. 316 317 Args are same as gslib.WildcardIterator interface, but this method fills 318 in most of the values from instance state. 319 320 Args: 321 url_string: URL string naming wildcard objects to iterate. 322 323 Returns: 324 Wildcard iterator over URL string. 325 """ 326 return gslib.wildcard_iterator.CreateWildcardIterator( 327 url_string, 328 self.gsutil_api, 329 all_versions=self.all_versions, 330 project_id=self.project_id, 331 ignore_symlinks=self.ignore_symlinks, 332 logger=self.logger) 333 334 335class SeekAheadNameExpansionIterator(object): 336 """Creates and wraps a _NameExpansionIterator and yields SeekAheadResults. 337 338 Unlike the NameExpansionIterator, which can make API calls upon __init__ 339 to check for plurality, this iterator does no work until the first iteration 340 occurs. 341 """ 342 343 def __init__(self, 344 command_name, 345 debug, 346 gsutil_api, 347 url_strs, 348 recursion_requested, 349 all_versions=False, 350 cmd_supports_recursion=True, 351 project_id=None, 352 ignore_symlinks=False): 353 """Initializes a _NameExpansionIterator with the inputs.""" 354 355 # Count data bytes only will be transferred/rewritten. 356 # Note that the rsync command uses a different iterator, thus it is not 357 # included here. 358 self.count_data_bytes = command_name in ('cp', 'mv', 'rewrite') 359 360 # Only query the file size if we are counting data bytes, as this may 361 # result in stat'ing files, which is more expensive. 362 bucket_listing_fields = ['size'] if self.count_data_bytes else None 363 364 self.name_expansion_iterator = _NameExpansionIterator( 365 command_name, 366 debug, 367 logging.getLogger('dummy'), 368 gsutil_api, 369 PluralityCheckableIterator(url_strs), 370 recursion_requested, 371 all_versions=all_versions, 372 cmd_supports_recursion=cmd_supports_recursion, 373 project_id=project_id, 374 ignore_symlinks=ignore_symlinks, 375 continue_on_error=True, 376 bucket_listing_fields=bucket_listing_fields) 377 378 def __iter__(self): 379 for name_expansion_result in self.name_expansion_iterator: 380 if self.count_data_bytes and name_expansion_result.expanded_result: 381 iterated_metadata = encoding.JsonToMessage( 382 apitools_messages.Object, name_expansion_result.expanded_result) 383 iterated_size = iterated_metadata.size or 0 384 yield SeekAheadResult(data_bytes=iterated_size) 385 else: 386 yield SeekAheadResult() 387 388 389def NameExpansionIterator(command_name, 390 debug, 391 logger, 392 gsutil_api, 393 url_strs, 394 recursion_requested, 395 all_versions=False, 396 cmd_supports_recursion=True, 397 project_id=None, 398 ignore_symlinks=False, 399 continue_on_error=False, 400 bucket_listing_fields=None): 401 """Static factory function for instantiating _NameExpansionIterator. 402 403 This wraps the resulting iterator in a PluralityCheckableIterator and checks 404 that it is non-empty. Also, allows url_strs to be either an array or an 405 iterator. 406 407 Args: 408 command_name: name of command being run. 409 debug: Debug level to pass to underlying iterators (range 0..3). 410 logger: logging.Logger object. 411 gsutil_api: Cloud storage interface. Settable for testing/mocking. 412 url_strs: Iterable URL strings needing expansion. 413 recursion_requested: True if -r specified on command-line. If so, 414 listings will be flattened so mapped-to results contain objects 415 spanning subdirectories. 416 all_versions: Bool indicating whether to iterate over all object versions. 417 cmd_supports_recursion: Bool indicating whether this command supports a '-r' 418 flag. Useful for printing helpful error messages. 419 project_id: Project id to use for the current command. 420 ignore_symlinks: If True, ignore symlinks during iteration. 421 continue_on_error: If true, yield no-match exceptions encountered during 422 iteration instead of raising them. 423 bucket_listing_fields: Iterable fields to include in expanded results. 424 Ex. ['name', 'acl']. Underyling iterator is responsible for converting 425 these to list-style format ['items/name', 'items/acl']. If this is 426 None, only the object name is included in the result. 427 428 Raises: 429 CommandException if underlying iterator is empty. 430 431 Returns: 432 Name expansion iterator instance. 433 434 For example semantics, see comments in NameExpansionIterator.__init__. 435 """ 436 url_strs = PluralityCheckableIterator(url_strs) 437 name_expansion_iterator = _NameExpansionIterator( 438 command_name, 439 debug, 440 logger, 441 gsutil_api, 442 url_strs, 443 recursion_requested, 444 all_versions=all_versions, 445 cmd_supports_recursion=cmd_supports_recursion, 446 project_id=project_id, 447 ignore_symlinks=ignore_symlinks, 448 continue_on_error=continue_on_error, 449 bucket_listing_fields=bucket_listing_fields) 450 name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator) 451 if name_expansion_iterator.IsEmpty(): 452 raise CommandException(NO_URLS_MATCHED_GENERIC) 453 return name_expansion_iterator 454 455 456class _NonContainerTuplifyIterator(object): 457 """Iterator that produces the tuple (False, blr) for each iterated value. 458 459 Used for cases where blr_iter iterates over a set of 460 BucketListingRefs known not to name containers. 461 """ 462 463 def __init__(self, blr_iter): 464 """Instantiates iterator. 465 466 Args: 467 blr_iter: iterator of BucketListingRef. 468 """ 469 self.blr_iter = blr_iter 470 471 def __iter__(self): 472 for blr in self.blr_iter: 473 yield (False, blr) 474 475 476class _OmitNonRecursiveIterator(object): 477 """Iterator wrapper for that omits certain values for non-recursive requests. 478 479 This iterates over tuples of (names_container, BucketListingReference) and 480 omits directories, prefixes, and buckets from non-recurisve requests 481 so that we can properly calculate whether the source URL expands to multiple 482 URLs. 483 484 For example, if we have a bucket containing two objects: bucket/foo and 485 bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be 486 yielded. 487 """ 488 489 def __init__(self, tuple_iter, recursion_requested, command_name, 490 cmd_supports_recursion, logger): 491 """Instanties the iterator. 492 493 Args: 494 tuple_iter: Iterator over names_container, BucketListingReference 495 from step 2 in the NameExpansionIterator 496 recursion_requested: If false, omit buckets, dirs, and subdirs 497 command_name: Command name for user messages 498 cmd_supports_recursion: Command recursion support for user messages 499 logger: Log object for user messages 500 """ 501 self.tuple_iter = tuple_iter 502 self.recursion_requested = recursion_requested 503 self.command_name = command_name 504 self.cmd_supports_recursion = cmd_supports_recursion 505 self.logger = logger 506 507 def __iter__(self): 508 for (names_container, blr) in self.tuple_iter: 509 if not self.recursion_requested and not blr.IsObject(): 510 # At this point we either have a bucket or a prefix, 511 # so if recursion is not requested, we're going to omit it. 512 expanded_url = StorageUrlFromString(blr.url_string) 513 if expanded_url.IsFileUrl(): 514 desc = 'directory' 515 else: 516 desc = blr.type_name 517 if self.cmd_supports_recursion: 518 self.logger.info('Omitting %s "%s". (Did you mean to do %s -r?)', 519 desc, blr.url_string, self.command_name) 520 else: 521 self.logger.info('Omitting %s "%s".', desc, blr.url_string) 522 else: 523 yield (names_container, blr) 524 525 526class _ImplicitBucketSubdirIterator(object): 527 """Iterator wrapper that performs implicit bucket subdir expansion. 528 529 Each iteration yields tuple (names_container, expanded BucketListingRefs) 530 where names_container is true if URL names a directory, bucket, 531 or bucket subdir. 532 533 For example, iterating over [BucketListingRef("gs://abc")] would expand to: 534 [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")] 535 if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise. 536 """ 537 538 def __init__(self, name_exp_instance, blr_iter, subdir_exp_wildcard, 539 bucket_listing_fields): 540 """Instantiates the iterator. 541 542 Args: 543 name_exp_instance: calling instance of NameExpansion class. 544 blr_iter: iterator over BucketListingRef prefixes and objects. 545 subdir_exp_wildcard: wildcard for expanding subdirectories; 546 expected values are ** if the mapped-to results should contain 547 objects spanning subdirectories, or * if only one level should 548 be listed. 549 bucket_listing_fields: Fields requested in enumerated results. 550 """ 551 self.blr_iter = blr_iter 552 self.name_exp_instance = name_exp_instance 553 self.subdir_exp_wildcard = subdir_exp_wildcard 554 self.bucket_listing_fields = bucket_listing_fields 555 556 def __iter__(self): 557 for blr in self.blr_iter: 558 if blr.IsPrefix(): 559 # This is a bucket subdirectory, list objects according to the wildcard. 560 prefix_url = StorageUrlFromString(blr.url_string).CreatePrefixUrl( 561 wildcard_suffix=self.subdir_exp_wildcard) 562 implicit_subdir_iterator = PluralityCheckableIterator( 563 self.name_exp_instance.WildcardIterator(prefix_url).IterAll( 564 bucket_listing_fields=self.bucket_listing_fields)) 565 if not implicit_subdir_iterator.IsEmpty(): 566 for exp_blr in implicit_subdir_iterator: 567 yield (True, exp_blr) 568 else: 569 # Prefix that contains no objects, for example in the $folder$ case 570 # or an empty filesystem directory. 571 yield (False, blr) 572 elif blr.IsObject(): 573 yield (False, blr) 574 else: 575 raise CommandException( 576 '_ImplicitBucketSubdirIterator got a bucket reference %s' % blr) 577 578 579class CopyObjectInfo(object): 580 """Represents the information needed for copying a single object. 581 """ 582 583 def __init__(self, name_expansion_result, exp_dst_url, 584 have_existing_dst_container): 585 """Instantiates the object info from name expansion result and destination. 586 587 Args: 588 name_expansion_result: StorageUrl that was being expanded. 589 exp_dst_url: StorageUrl of the destination. 590 have_existing_dst_container: Whether exp_url names an existing directory, 591 bucket, or bucket subdirectory. 592 """ 593 self.source_storage_url = name_expansion_result.source_storage_url 594 self.is_multi_source_request = name_expansion_result.is_multi_source_request 595 self.names_container = name_expansion_result.names_container 596 self.expanded_storage_url = name_expansion_result.expanded_storage_url 597 self.expanded_result = name_expansion_result.expanded_result 598 599 self.exp_dst_url = exp_dst_url 600 self.have_existing_dst_container = have_existing_dst_container 601 602 603# Describes the destination information resulted from ExpandUrlToSingleBlr. 604DestinationInfo = collections.namedtuple( 605 'DestinationInfo', 606 [ 607 # The expanded destination StorageURL. 608 'exp_dst_url', 609 # Bool indicating whether the expanded destination names an existing 610 # directory, bucket, or bucket subdirectory. 611 'have_existing_dst_container', 612 ]) 613 614# Describes (NameExpansionIterator, DestinationInfo) tuple. 615NameExpansionIteratorDestinationTuple = collections.namedtuple( 616 'NameExpansionIteratorDestinationTuple', [ 617 'name_expansion_iter', 618 'destination', 619 ]) 620 621 622class CopyObjectsIterator(six.Iterator): 623 """Iterator wrapper for copying objects and keeping track of source URL types. 624 625 This is used in the cp command for copying from multiple source to multiple 626 destinations. It takes a list of NameExpansionIteratorDestinationTuple. It 627 wraps them and return CopyObjectInfo objects that wraps NameExpansionResult 628 with the destination. It's used also for collecting analytics 629 PerformanceSummary info, because there may be multiple source URLs and we want 630 to know if any of them are file URLs, if any of them are cloud URLs, if any of 631 them require daisy chain operations, and if any use different providers. The 632 source URL type information will be aggregated at the end of _SequentialApply 633 or _ParallelApply. 634 """ 635 636 def __init__(self, name_expansion_dest_iter, is_daisy_chain): 637 """Instantiates the iterator. 638 639 Args: 640 name_expansion_dest_iter: NameExpansionIteratorDestinationTuple iterator. 641 is_daisy_chain: The -D option in cp might have already been specified, in 642 which case we do not need to check again for daisy chain operations. 643 """ 644 self.is_daisy_chain = is_daisy_chain 645 self.has_file_src = False 646 self.has_cloud_src = False 647 self.provider_types = [] 648 649 self.name_expansion_dest_iter = name_expansion_dest_iter 650 name_expansion_dest_tuple = next(self.name_expansion_dest_iter) 651 self.current_expansion_iter = name_expansion_dest_tuple.name_expansion_iter 652 self.current_destination = name_expansion_dest_tuple.destination 653 654 def __iter__(self): 655 return self 656 657 def __next__(self): 658 """Keeps track of URL types as the command iterates over arguments.""" 659 try: 660 name_expansion_result = next(self.current_expansion_iter) 661 except StopIteration: 662 name_expansion_dest_tuple = next(self.name_expansion_dest_iter) 663 self.current_expansion_iter = ( 664 name_expansion_dest_tuple.name_expansion_iter) 665 self.current_destination = name_expansion_dest_tuple.destination 666 return self.__next__() 667 668 elt = CopyObjectInfo(name_expansion_result, 669 self.current_destination.exp_dst_url, 670 self.current_destination.have_existing_dst_container) 671 672 # Check if we've seen a file source. 673 if not self.has_file_src and elt.source_storage_url.IsFileUrl(): 674 self.has_file_src = True 675 # Check if we've seen a cloud source. 676 if not self.has_cloud_src and elt.source_storage_url.IsCloudUrl(): 677 self.has_cloud_src = True 678 679 # Check if we've seen a daisy-chain condition. 680 if self.current_destination.exp_dst_url.IsCloudUrl(): 681 dst_url_scheme = self.current_destination.exp_dst_url.scheme 682 else: 683 dst_url_scheme = None 684 685 if (not self.is_daisy_chain and dst_url_scheme is not None and 686 elt.source_storage_url.IsCloudUrl() and 687 elt.source_storage_url.scheme != dst_url_scheme): 688 self.is_daisy_chain = True 689 # Check if we've seen a new provider type. 690 if elt.source_storage_url.scheme not in self.provider_types: 691 self.provider_types.append(elt.source_storage_url.scheme) 692 693 return elt 694