1# -*- coding: utf-8 -*-
2# Copyright 2012 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Name expansion iterator and result classes.
16
17Name expansion support for the various ways gsutil lets users refer to
18collections of data (via explicit wildcarding as well as directory,
19bucket, and bucket subdir implicit wildcarding). This class encapsulates
20the various rules for determining how these expansions are done.
21"""
22
23from __future__ import absolute_import
24from __future__ import print_function
25from __future__ import division
26from __future__ import unicode_literals
27
28import collections
29import logging
30import os
31import sys
32
33import six
34
35from apitools.base.py import encoding
36import gslib
37from gslib.exception import CommandException
38from gslib.exception import NO_URLS_MATCHED_GENERIC
39from gslib.exception import NO_URLS_MATCHED_TARGET
40from gslib.plurality_checkable_iterator import PluralityCheckableIterator
41from gslib.seek_ahead_thread import SeekAheadResult
42from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
43import gslib.wildcard_iterator
44from gslib.wildcard_iterator import StorageUrlFromString
45
46
47class NameExpansionResult(object):
48  """Holds one fully expanded result from iterating over NameExpansionIterator.
49
50  The member data in this class need to be pickleable because
51  NameExpansionResult instances are passed through Multiprocessing.Queue. In
52  particular, don't include any boto state like StorageUri, since that pulls
53  in a big tree of objects, some of which aren't pickleable (and even if
54  they were, pickling/unpickling such a large object tree would result in
55  significant overhead).
56
57  The state held in this object is needed for handling the various naming cases
58  (e.g., copying from a single source URL to a directory generates different
59  dest URL names than copying multiple URLs to a directory, to be consistent
60  with naming rules used by the Unix cp command). For more details see comments
61  in _NameExpansionIterator.
62  """
63
64  def __init__(self, source_storage_url, is_multi_source_request,
65               names_container, expanded_storage_url, expanded_result):
66    """Instantiates a result from name expansion.
67
68    Args:
69      source_storage_url: StorageUrl that was being expanded.
70      is_multi_source_request: bool indicator whether src_url_str expanded to
71          more than one BucketListingRef.
72      names_container: Bool indicator whether src_url names a container.
73      expanded_storage_url: StorageUrl that was expanded.
74      expanded_result: cloud object metadata in MessageToJson form (for
75          pickleability), if any was iterated; None otherwise.
76          Consumers must call JsonToMessage to get an apitools Object.
77    """
78    self.source_storage_url = source_storage_url
79    self.is_multi_source_request = is_multi_source_request
80    self.names_container = names_container
81    self.expanded_storage_url = expanded_storage_url
82    self.expanded_result = encoding.MessageToJson(
83        expanded_result) if expanded_result else None
84
85  def __repr__(self):
86    return '%s' % self.expanded_storage_url
87
88
89class _NameExpansionIterator(object):
90  """Class that iterates over all source URLs passed to the iterator.
91
92  See details in __iter__ function doc.
93  """
94
95  def __init__(self,
96               command_name,
97               debug,
98               logger,
99               gsutil_api,
100               url_strs,
101               recursion_requested,
102               all_versions=False,
103               cmd_supports_recursion=True,
104               project_id=None,
105               ignore_symlinks=False,
106               continue_on_error=False,
107               bucket_listing_fields=None):
108    """Creates a NameExpansionIterator.
109
110    Args:
111      command_name: name of command being run.
112      debug: Debug level to pass to underlying iterators (range 0..3).
113      logger: logging.Logger object.
114      gsutil_api: Cloud storage interface.  Settable for testing/mocking.
115      url_strs: PluralityCheckableIterator of URL strings needing expansion.
116      recursion_requested: True if -r specified on command-line.  If so,
117          listings will be flattened so mapped-to results contain objects
118          spanning subdirectories.
119      all_versions: Bool indicating whether to iterate over all object versions.
120      cmd_supports_recursion: Bool indicating whether this command supports a
121          '-r' flag. Useful for printing helpful error messages.
122      project_id: Project id to use for bucket retrieval.
123      ignore_symlinks: If True, ignore symlinks during iteration.
124      continue_on_error: If true, yield no-match exceptions encountered during
125                         iteration instead of raising them.
126      bucket_listing_fields: Iterable fields to include in expanded results.
127          Ex. ['name', 'acl']. Underyling iterator is responsible for converting
128          these to list-style format ['items/name', 'items/acl']. If this is
129          None, only the object name is included in the result.
130
131    Examples of _NameExpansionIterator with recursion_requested=True:
132      - Calling with one of the url_strs being 'gs://bucket' will enumerate all
133        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
134      - 'gs://bucket/**' will enumerate all objects in the bucket.
135      - 'gs://bucket/abc' will enumerate either the single object abc or, if
136         abc is a subdirectory, all objects under abc and any of its
137         subdirectories.
138      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
139        subdirectories.
140      - 'file:///tmp' will enumerate all files under /tmp, as will
141        'file:///tmp/*'
142      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
143        subdirectories.
144
145    Example if recursion_requested=False:
146      calling with gs://bucket/abc/* lists matching objects
147      or subdirs, but not sub-subdirs or objects beneath subdirs.
148
149    Note: In step-by-step comments below we give examples assuming there's a
150    gs://bucket with object paths:
151      abcd/o1.txt
152      abcd/o2.txt
153      xyz/o1.txt
154      xyz/o2.txt
155    and a directory file://dir with file paths:
156      dir/a.txt
157      dir/b.txt
158      dir/c/
159    """
160    self.command_name = command_name
161    self.debug = debug
162    self.logger = logger
163    self.gsutil_api = gsutil_api
164    self.url_strs = url_strs
165    self.recursion_requested = recursion_requested
166    self.all_versions = all_versions
167    # Check self.url_strs.HasPlurality() at start because its value can change
168    # if url_strs is itself an iterator.
169    self.url_strs.has_plurality = self.url_strs.HasPlurality()
170    self.cmd_supports_recursion = cmd_supports_recursion
171    self.project_id = project_id
172    self.ignore_symlinks = ignore_symlinks
173    self.continue_on_error = continue_on_error
174    self.bucket_listing_fields = (set(['name']) if not bucket_listing_fields
175                                  else bucket_listing_fields)
176
177    # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
178    # (A flat listing means show all objects expanded all the way down.)
179    self._flatness_wildcard = {True: '**', False: '*'}
180
181  def __iter__(self):
182    """Iterates over all source URLs passed to the iterator.
183
184    For each src url, expands wildcards, object-less bucket names,
185    subdir bucket names, and directory names, and generates a flat listing of
186    all the matching objects/files.
187
188    You should instantiate this object using the static factory function
189    NameExpansionIterator, because consumers of this iterator need the
190    PluralityCheckableIterator wrapper built by that function.
191
192    Yields:
193      gslib.name_expansion.NameExpansionResult.
194
195    Raises:
196      CommandException: if errors encountered.
197    """
198    for url_str in self.url_strs:
199      storage_url = StorageUrlFromString(url_str)
200
201      if (storage_url.IsFileUrl() and
202          (storage_url.IsStream() or storage_url.IsFifo())):
203        if self.url_strs.has_plurality:
204          raise CommandException('Multiple URL strings are not supported '
205                                 'with streaming ("-") URLs or named pipes.')
206        yield NameExpansionResult(storage_url, False, False, storage_url, None)
207        continue
208
209      # Step 1: Expand any explicitly specified wildcards. The output from this
210      # step is an iterator of BucketListingRef.
211      # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
212
213      src_names_bucket = False
214      if (storage_url.IsCloudUrl() and storage_url.IsBucket() and
215          not self.recursion_requested):
216        # UNIX commands like rm and cp will omit directory references.
217        # If url_str refers only to buckets and we are not recursing,
218        # then produce references of type BUCKET, because they are guaranteed
219        # to pass through Step 2 and be omitted in Step 3.
220        post_step1_iter = PluralityCheckableIterator(
221            self.WildcardIterator(url_str).IterBuckets(bucket_fields=['id']))
222      else:
223        # Get a list of objects and prefixes, expanding the top level for
224        # any listed buckets.  If our source is a bucket, however, we need
225        # to treat all of the top level expansions as names_container=True.
226        post_step1_iter = PluralityCheckableIterator(
227            self.WildcardIterator(url_str).IterAll(
228                bucket_listing_fields=self.bucket_listing_fields,
229                expand_top_level_buckets=True))
230        if storage_url.IsCloudUrl() and storage_url.IsBucket():
231          src_names_bucket = True
232
233      # Step 2: Expand bucket subdirs. The output from this
234      # step is an iterator of (names_container, BucketListingRef).
235      # Starting with gs://bucket/abcd this step would expand to:
236      #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
237      subdir_exp_wildcard = self._flatness_wildcard[self.recursion_requested]
238      if self.recursion_requested:
239        post_step2_iter = _ImplicitBucketSubdirIterator(
240            self, post_step1_iter, subdir_exp_wildcard,
241            self.bucket_listing_fields)
242      else:
243        post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
244      post_step2_iter = PluralityCheckableIterator(post_step2_iter)
245
246      # Because we actually perform and check object listings here, this will
247      # raise if url_args includes a non-existent object.  However,
248      # plurality_checkable_iterator will buffer the exception for us, not
249      # raising it until the iterator is actually asked to yield the first
250      # result.
251      if post_step2_iter.IsEmpty():
252        if self.continue_on_error:
253          try:
254            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
255          except CommandException as e:
256            # Yield a specialized tuple of (exception, stack_trace) to
257            # the wrapping PluralityCheckableIterator.
258            yield (e, sys.exc_info()[2])
259        else:
260          raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
261
262      # Step 3. Omit any directories, buckets, or bucket subdirectories for
263      # non-recursive expansions.
264      post_step3_iter = PluralityCheckableIterator(
265          _OmitNonRecursiveIterator(post_step2_iter, self.recursion_requested,
266                                    self.command_name,
267                                    self.cmd_supports_recursion, self.logger))
268
269      src_url_expands_to_multi = post_step3_iter.HasPlurality()
270      is_multi_source_request = (self.url_strs.has_plurality or
271                                 src_url_expands_to_multi)
272
273      # Step 4. Expand directories and buckets. This step yields the iterated
274      # values. Starting with gs://bucket this step would expand to:
275      #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
276      # Starting with file://dir this step would expand to:
277      #  [dir/a.txt, dir/b.txt, dir/c/]
278      for (names_container, blr) in post_step3_iter:
279        src_names_container = src_names_bucket or names_container
280
281        if blr.IsObject():
282          yield NameExpansionResult(storage_url, is_multi_source_request,
283                                    src_names_container, blr.storage_url,
284                                    blr.root_object)
285        else:
286          # Use implicit wildcarding to do the enumeration.
287          # At this point we are guaranteed that:
288          # - Recursion has been requested because non-object entries are
289          #   filtered in step 3 otherwise.
290          # - This is a prefix or bucket subdirectory because only
291          #   non-recursive iterations product bucket references.
292          expanded_url = StorageUrlFromString(blr.url_string)
293          if expanded_url.IsFileUrl():
294            # Convert dir to implicit recursive wildcard.
295            url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard)
296          else:
297            # Convert subdir to implicit recursive wildcard.
298            url_to_iterate = expanded_url.CreatePrefixUrl(
299                wildcard_suffix=subdir_exp_wildcard)
300
301          wc_iter = PluralityCheckableIterator(
302              self.WildcardIterator(url_to_iterate).IterObjects(
303                  bucket_listing_fields=self.bucket_listing_fields))
304          src_url_expands_to_multi = (src_url_expands_to_multi or
305                                      wc_iter.HasPlurality())
306          is_multi_source_request = (self.url_strs.has_plurality or
307                                     src_url_expands_to_multi)
308          # This will be a flattened listing of all underlying objects in the
309          # subdir.
310          for blr in wc_iter:
311            yield NameExpansionResult(storage_url, is_multi_source_request,
312                                      True, blr.storage_url, blr.root_object)
313
314  def WildcardIterator(self, url_string):
315    """Helper to instantiate gslib.WildcardIterator.
316
317    Args are same as gslib.WildcardIterator interface, but this method fills
318    in most of the values from instance state.
319
320    Args:
321      url_string: URL string naming wildcard objects to iterate.
322
323    Returns:
324      Wildcard iterator over URL string.
325    """
326    return gslib.wildcard_iterator.CreateWildcardIterator(
327        url_string,
328        self.gsutil_api,
329        all_versions=self.all_versions,
330        project_id=self.project_id,
331        ignore_symlinks=self.ignore_symlinks,
332        logger=self.logger)
333
334
335class SeekAheadNameExpansionIterator(object):
336  """Creates and wraps a _NameExpansionIterator and yields SeekAheadResults.
337
338  Unlike the NameExpansionIterator, which can make API calls upon __init__
339  to check for plurality, this iterator does no work until the first iteration
340  occurs.
341  """
342
343  def __init__(self,
344               command_name,
345               debug,
346               gsutil_api,
347               url_strs,
348               recursion_requested,
349               all_versions=False,
350               cmd_supports_recursion=True,
351               project_id=None,
352               ignore_symlinks=False):
353    """Initializes a _NameExpansionIterator with the inputs."""
354
355    # Count data bytes only will be transferred/rewritten.
356    # Note that the rsync command uses a different iterator, thus it is not
357    # included here.
358    self.count_data_bytes = command_name in ('cp', 'mv', 'rewrite')
359
360    # Only query the file size if we are counting data bytes, as this may
361    # result in stat'ing files, which is more expensive.
362    bucket_listing_fields = ['size'] if self.count_data_bytes else None
363
364    self.name_expansion_iterator = _NameExpansionIterator(
365        command_name,
366        debug,
367        logging.getLogger('dummy'),
368        gsutil_api,
369        PluralityCheckableIterator(url_strs),
370        recursion_requested,
371        all_versions=all_versions,
372        cmd_supports_recursion=cmd_supports_recursion,
373        project_id=project_id,
374        ignore_symlinks=ignore_symlinks,
375        continue_on_error=True,
376        bucket_listing_fields=bucket_listing_fields)
377
378  def __iter__(self):
379    for name_expansion_result in self.name_expansion_iterator:
380      if self.count_data_bytes and name_expansion_result.expanded_result:
381        iterated_metadata = encoding.JsonToMessage(
382            apitools_messages.Object, name_expansion_result.expanded_result)
383        iterated_size = iterated_metadata.size or 0
384        yield SeekAheadResult(data_bytes=iterated_size)
385      else:
386        yield SeekAheadResult()
387
388
389def NameExpansionIterator(command_name,
390                          debug,
391                          logger,
392                          gsutil_api,
393                          url_strs,
394                          recursion_requested,
395                          all_versions=False,
396                          cmd_supports_recursion=True,
397                          project_id=None,
398                          ignore_symlinks=False,
399                          continue_on_error=False,
400                          bucket_listing_fields=None):
401  """Static factory function for instantiating _NameExpansionIterator.
402
403  This wraps the resulting iterator in a PluralityCheckableIterator and checks
404  that it is non-empty. Also, allows url_strs to be either an array or an
405  iterator.
406
407  Args:
408    command_name: name of command being run.
409    debug: Debug level to pass to underlying iterators (range 0..3).
410    logger: logging.Logger object.
411    gsutil_api: Cloud storage interface.  Settable for testing/mocking.
412    url_strs: Iterable URL strings needing expansion.
413    recursion_requested: True if -r specified on command-line.  If so,
414        listings will be flattened so mapped-to results contain objects
415        spanning subdirectories.
416    all_versions: Bool indicating whether to iterate over all object versions.
417    cmd_supports_recursion: Bool indicating whether this command supports a '-r'
418        flag. Useful for printing helpful error messages.
419    project_id: Project id to use for the current command.
420    ignore_symlinks: If True, ignore symlinks during iteration.
421    continue_on_error: If true, yield no-match exceptions encountered during
422                       iteration instead of raising them.
423    bucket_listing_fields: Iterable fields to include in expanded results.
424        Ex. ['name', 'acl']. Underyling iterator is responsible for converting
425        these to list-style format ['items/name', 'items/acl']. If this is
426        None, only the object name is included in the result.
427
428  Raises:
429    CommandException if underlying iterator is empty.
430
431  Returns:
432    Name expansion iterator instance.
433
434  For example semantics, see comments in NameExpansionIterator.__init__.
435  """
436  url_strs = PluralityCheckableIterator(url_strs)
437  name_expansion_iterator = _NameExpansionIterator(
438      command_name,
439      debug,
440      logger,
441      gsutil_api,
442      url_strs,
443      recursion_requested,
444      all_versions=all_versions,
445      cmd_supports_recursion=cmd_supports_recursion,
446      project_id=project_id,
447      ignore_symlinks=ignore_symlinks,
448      continue_on_error=continue_on_error,
449      bucket_listing_fields=bucket_listing_fields)
450  name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator)
451  if name_expansion_iterator.IsEmpty():
452    raise CommandException(NO_URLS_MATCHED_GENERIC)
453  return name_expansion_iterator
454
455
456class _NonContainerTuplifyIterator(object):
457  """Iterator that produces the tuple (False, blr) for each iterated value.
458
459  Used for cases where blr_iter iterates over a set of
460  BucketListingRefs known not to name containers.
461  """
462
463  def __init__(self, blr_iter):
464    """Instantiates iterator.
465
466    Args:
467      blr_iter: iterator of BucketListingRef.
468    """
469    self.blr_iter = blr_iter
470
471  def __iter__(self):
472    for blr in self.blr_iter:
473      yield (False, blr)
474
475
476class _OmitNonRecursiveIterator(object):
477  """Iterator wrapper for that omits certain values for non-recursive requests.
478
479  This iterates over tuples of (names_container, BucketListingReference) and
480  omits directories, prefixes, and buckets from non-recurisve requests
481  so that we can properly calculate whether the source URL expands to multiple
482  URLs.
483
484  For example, if we have a bucket containing two objects: bucket/foo and
485  bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be
486  yielded.
487  """
488
489  def __init__(self, tuple_iter, recursion_requested, command_name,
490               cmd_supports_recursion, logger):
491    """Instanties the iterator.
492
493    Args:
494      tuple_iter: Iterator over names_container, BucketListingReference
495                  from step 2 in the NameExpansionIterator
496      recursion_requested: If false, omit buckets, dirs, and subdirs
497      command_name: Command name for user messages
498      cmd_supports_recursion: Command recursion support for user messages
499      logger: Log object for user messages
500    """
501    self.tuple_iter = tuple_iter
502    self.recursion_requested = recursion_requested
503    self.command_name = command_name
504    self.cmd_supports_recursion = cmd_supports_recursion
505    self.logger = logger
506
507  def __iter__(self):
508    for (names_container, blr) in self.tuple_iter:
509      if not self.recursion_requested and not blr.IsObject():
510        # At this point we either have a bucket or a prefix,
511        # so if recursion is not requested, we're going to omit it.
512        expanded_url = StorageUrlFromString(blr.url_string)
513        if expanded_url.IsFileUrl():
514          desc = 'directory'
515        else:
516          desc = blr.type_name
517        if self.cmd_supports_recursion:
518          self.logger.info('Omitting %s "%s". (Did you mean to do %s -r?)',
519                           desc, blr.url_string, self.command_name)
520        else:
521          self.logger.info('Omitting %s "%s".', desc, blr.url_string)
522      else:
523        yield (names_container, blr)
524
525
526class _ImplicitBucketSubdirIterator(object):
527  """Iterator wrapper that performs implicit bucket subdir expansion.
528
529  Each iteration yields tuple (names_container, expanded BucketListingRefs)
530    where names_container is true if URL names a directory, bucket,
531    or bucket subdir.
532
533  For example, iterating over [BucketListingRef("gs://abc")] would expand to:
534    [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
535  if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
536  """
537
538  def __init__(self, name_exp_instance, blr_iter, subdir_exp_wildcard,
539               bucket_listing_fields):
540    """Instantiates the iterator.
541
542    Args:
543      name_exp_instance: calling instance of NameExpansion class.
544      blr_iter: iterator over BucketListingRef prefixes and objects.
545      subdir_exp_wildcard: wildcard for expanding subdirectories;
546          expected values are ** if the mapped-to results should contain
547          objects spanning subdirectories, or * if only one level should
548          be listed.
549      bucket_listing_fields: Fields requested in enumerated results.
550    """
551    self.blr_iter = blr_iter
552    self.name_exp_instance = name_exp_instance
553    self.subdir_exp_wildcard = subdir_exp_wildcard
554    self.bucket_listing_fields = bucket_listing_fields
555
556  def __iter__(self):
557    for blr in self.blr_iter:
558      if blr.IsPrefix():
559        # This is a bucket subdirectory, list objects according to the wildcard.
560        prefix_url = StorageUrlFromString(blr.url_string).CreatePrefixUrl(
561            wildcard_suffix=self.subdir_exp_wildcard)
562        implicit_subdir_iterator = PluralityCheckableIterator(
563            self.name_exp_instance.WildcardIterator(prefix_url).IterAll(
564                bucket_listing_fields=self.bucket_listing_fields))
565        if not implicit_subdir_iterator.IsEmpty():
566          for exp_blr in implicit_subdir_iterator:
567            yield (True, exp_blr)
568        else:
569          # Prefix that contains no objects, for example in the $folder$ case
570          # or an empty filesystem directory.
571          yield (False, blr)
572      elif blr.IsObject():
573        yield (False, blr)
574      else:
575        raise CommandException(
576            '_ImplicitBucketSubdirIterator got a bucket reference %s' % blr)
577
578
579class CopyObjectInfo(object):
580  """Represents the information needed for copying a single object.
581  """
582
583  def __init__(self, name_expansion_result, exp_dst_url,
584               have_existing_dst_container):
585    """Instantiates the object info from name expansion result and destination.
586
587    Args:
588      name_expansion_result: StorageUrl that was being expanded.
589      exp_dst_url: StorageUrl of the destination.
590      have_existing_dst_container: Whether exp_url names an existing directory,
591          bucket, or bucket subdirectory.
592    """
593    self.source_storage_url = name_expansion_result.source_storage_url
594    self.is_multi_source_request = name_expansion_result.is_multi_source_request
595    self.names_container = name_expansion_result.names_container
596    self.expanded_storage_url = name_expansion_result.expanded_storage_url
597    self.expanded_result = name_expansion_result.expanded_result
598
599    self.exp_dst_url = exp_dst_url
600    self.have_existing_dst_container = have_existing_dst_container
601
602
603# Describes the destination information resulted from ExpandUrlToSingleBlr.
604DestinationInfo = collections.namedtuple(
605    'DestinationInfo',
606    [
607        # The expanded destination StorageURL.
608        'exp_dst_url',
609        # Bool indicating whether the expanded destination names an existing
610        # directory, bucket, or bucket subdirectory.
611        'have_existing_dst_container',
612    ])
613
614# Describes (NameExpansionIterator, DestinationInfo) tuple.
615NameExpansionIteratorDestinationTuple = collections.namedtuple(
616    'NameExpansionIteratorDestinationTuple', [
617        'name_expansion_iter',
618        'destination',
619    ])
620
621
622class CopyObjectsIterator(six.Iterator):
623  """Iterator wrapper for copying objects and keeping track of source URL types.
624
625  This is used in the cp command for copying from multiple source to multiple
626  destinations. It takes a list of NameExpansionIteratorDestinationTuple. It
627  wraps them and return CopyObjectInfo objects that wraps NameExpansionResult
628  with the destination. It's used also for collecting analytics
629  PerformanceSummary info, because there may be multiple source URLs and we want
630  to know if any of them are file URLs, if any of them are cloud URLs, if any of
631  them require daisy chain operations, and if any use different providers. The
632  source URL type information will be aggregated at the end of _SequentialApply
633  or _ParallelApply.
634  """
635
636  def __init__(self, name_expansion_dest_iter, is_daisy_chain):
637    """Instantiates the iterator.
638
639    Args:
640      name_expansion_dest_iter: NameExpansionIteratorDestinationTuple iterator.
641      is_daisy_chain: The -D option in cp might have already been specified, in
642          which case we do not need to check again for daisy chain operations.
643    """
644    self.is_daisy_chain = is_daisy_chain
645    self.has_file_src = False
646    self.has_cloud_src = False
647    self.provider_types = []
648
649    self.name_expansion_dest_iter = name_expansion_dest_iter
650    name_expansion_dest_tuple = next(self.name_expansion_dest_iter)
651    self.current_expansion_iter = name_expansion_dest_tuple.name_expansion_iter
652    self.current_destination = name_expansion_dest_tuple.destination
653
654  def __iter__(self):
655    return self
656
657  def __next__(self):
658    """Keeps track of URL types as the command iterates over arguments."""
659    try:
660      name_expansion_result = next(self.current_expansion_iter)
661    except StopIteration:
662      name_expansion_dest_tuple = next(self.name_expansion_dest_iter)
663      self.current_expansion_iter = (
664          name_expansion_dest_tuple.name_expansion_iter)
665      self.current_destination = name_expansion_dest_tuple.destination
666      return self.__next__()
667
668    elt = CopyObjectInfo(name_expansion_result,
669                         self.current_destination.exp_dst_url,
670                         self.current_destination.have_existing_dst_container)
671
672    # Check if we've seen a file source.
673    if not self.has_file_src and elt.source_storage_url.IsFileUrl():
674      self.has_file_src = True
675    # Check if we've seen a cloud source.
676    if not self.has_cloud_src and elt.source_storage_url.IsCloudUrl():
677      self.has_cloud_src = True
678
679    # Check if we've seen a daisy-chain condition.
680    if self.current_destination.exp_dst_url.IsCloudUrl():
681      dst_url_scheme = self.current_destination.exp_dst_url.scheme
682    else:
683      dst_url_scheme = None
684
685    if (not self.is_daisy_chain and dst_url_scheme is not None and
686        elt.source_storage_url.IsCloudUrl() and
687        elt.source_storage_url.scheme != dst_url_scheme):
688      self.is_daisy_chain = True
689    # Check if we've seen a new provider type.
690    if elt.source_storage_url.scheme not in self.provider_types:
691      self.provider_types.append(elt.source_storage_url.scheme)
692
693    return elt
694