third_party/gsutil/gslib/name_expansion.py - depot_tools - Git at Google

 # Copyright 2012 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 import copy
 import threading
 import wildcard_iterator

 from bucket_listing_ref import BucketListingRef
 from gslib.exception import CommandException
 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
 from gslib.storage_uri_builder import StorageUriBuilder
 from wildcard_iterator import ContainsWildcard

 """
 Name expansion support for the various ways gsutil lets users refer to
 collections of data (via explicit wildcarding as well as directory,
 bucket, and bucket subdir implicit wildcarding). This class encapsulates
 the various rules for determining how these expansions are done.
 """


 class NameExpansionResult(object):
   """
   Holds one fully expanded result from iterating over NameExpansionIterator.

   The member data in this class need to be pickleable because
   NameExpansionResult instances are passed through Multiprocessing.Queue. In
   particular, don't include any boto state like StorageUri, since that pulls
   in a big tree of objects, some of which aren't pickleable (and even if
   they were, pickling/unpickling such a large object tree would result in
   significant overhead).

   The state held in this object is needed for handling the various naming cases
   (e.g., copying from a single source URI to a directory generates different
   dest URI names than copying multiple URIs to a directory, to be consistent
   with naming rules used by the Unix cp command). For more details see comments
   in _NameExpansionIterator.
   """

   def __init__(self, src_uri_str, is_multi_src_request,
                src_uri_expands_to_multi, names_container, expanded_uri_str,
                have_existing_dst_container=None, is_latest=False):
     """
     Args:
       src_uri_str: string representation of StorageUri that was expanded.
       is_multi_src_request: bool indicator whether src_uri_str expanded to more
           than 1 BucketListingRef.
       src_uri_expands_to_multi: bool indicator whether the current src_uri
           expanded to more than 1 BucketListingRef.
       names_container: Bool indicator whether src_uri names a container.
       expanded_uri_str: string representation of StorageUri to which src_uri_str
           expands.
       have_existing_dst_container: bool indicator whether this is a copy
           request to an existing bucket, bucket subdir, or directory. Default
           None value should be used in cases where this is not needed (commands
           other than cp).
       is_latest: Bool indicating that the result represents the object's current
           version.
     """
     self.src_uri_str = src_uri_str
     self.is_multi_src_request = is_multi_src_request
     self.src_uri_expands_to_multi = src_uri_expands_to_multi
     self.names_container = names_container
     self.expanded_uri_str = expanded_uri_str
     self.have_existing_dst_container = have_existing_dst_container
     self.is_latest = is_latest

   def __repr__(self):
     return '%s' % self.expanded_uri_str

   def IsEmpty(self):
     """Returns True if name expansion yielded no matches."""
     return self.expanded_blr is None

   def GetSrcUriStr(self):
     """Returns the string representation of the StorageUri that was expanded."""
     return self.src_uri_str

   def IsMultiSrcRequest(self):
     """
     Returns bool indicator whether name expansion resulted in more than 0
     BucketListingRef.
     """
     return self.is_multi_src_request

   def SrcUriExpandsToMulti(self):
     """
     Returns bool indicator whether the current src_uri expanded to more than
     1 BucketListingRef
     """
     return self.src_uri_expands_to_multi

   def NamesContainer(self):
     """
     Returns bool indicator of whether src_uri names a directory, bucket, or
     bucket subdir.
     """
     return self.names_container

   def GetExpandedUriStr(self):
     """
     Returns the string representation of StorageUri to which src_uri_str
     expands.
     """
     return self.expanded_uri_str

   def HaveExistingDstContainer(self):
     """Returns bool indicator whether this is a copy request to an
        existing bucket, bucket subdir, or directory, or None if not
        relevant."""
     return self.have_existing_dst_container


 class _NameExpansionIterator(object):
   """
   Iterates over all src_uris, expanding wildcards, object-less bucket names,
   subdir bucket names, and directory names, generating a flat listing of all
   the matching objects/files.

   You should instantiate this object using the static factory function
   NameExpansionIterator, because consumers of this iterator need the
   PluralityCheckableIterator wrapper built by that function.

   Yields:
     gslib.name_expansion.NameExpansionResult.

   Raises:
     CommandException: if errors encountered.
   """

   def __init__(self, command_name, proj_id_handler, headers, debug,
                bucket_storage_uri_class, uri_strs, recursion_requested,
                have_existing_dst_container=None, flat=True,
                all_versions=False, for_all_version_delete=False):
     """
     Args:
       command_name: name of command being run.
       proj_id_handler: ProjectIdHandler to use for current command.
       headers: Dictionary containing optional HTTP headers to pass to boto.
       debug: Debug level to pass in to boto connection (range 0..3).
       bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
           Settable for testing/mocking.
       uri_strs: PluralityCheckableIterator of URI strings needing expansion.
       recursion_requested: True if -R specified on command-line.
       have_existing_dst_container: Bool indicator whether this is a copy
           request to an existing bucket, bucket subdir, or directory. Default
           None value should be used in cases where this is not needed (commands
           other than cp).
       flat: Bool indicating whether bucket listings should be flattened, i.e.,
           so the mapped-to results contain objects spanning subdirectories.
       all_versions: Bool indicating whether to iterate over all object versions.
       for_all_version_delete: Bool indicating whether this is for an all-version
           delete.

     Examples of _NameExpansionIterator with flat=True:
       - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
         top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
       - 'gs://bucket/**' will enumerate all objects in the bucket.
       - 'gs://bucket/abc' will enumerate all next-level objects under directory
         abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
         matches any objects; otherwise it will enumerate the single name
         gs://bucket/abc
       - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
         subdirectories.
       - 'file:///tmp' will enumerate all files under /tmp, as will
         'file:///tmp/*'
       - 'file:///tmp/**' will enumerate all files under /tmp or any of its
         subdirectories.

     Example if flat=False: calling with gs://bucket/abc/* lists matching objects
     or subdirs, but not sub-subdirs or objects beneath subdirs.

     Note: In step-by-step comments below we give examples assuming there's a
     gs://bucket with object paths:
       abcd/o1.txt
       abcd/o2.txt
       xyz/o1.txt
       xyz/o2.txt
     and a directory file://dir with file paths:
       dir/a.txt
       dir/b.txt
       dir/c/
     """
     self.command_name = command_name
     self.proj_id_handler = proj_id_handler
     self.headers = headers
     self.debug = debug
     self.bucket_storage_uri_class = bucket_storage_uri_class
     self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
     self.uri_strs = uri_strs
     self.recursion_requested = recursion_requested
     self.have_existing_dst_container = have_existing_dst_container
     self.flat = flat
     self.all_versions = all_versions

     # Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
     # (A flat listing means show all objects expanded all the way down.)
     self._flatness_wildcard = {True: '**', False: '*'}

   def __iter__(self):
     for uri_str in self.uri_strs:
       # Step 1: Expand any explicitly specified wildcards. The output from this
       # step is an iterator of BucketListingRef.
       # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd
       if ContainsWildcard(uri_str):
         post_step1_iter = self._WildcardIterator(uri_str)
       else:
         suri = self.suri_builder.StorageUri(uri_str)
         post_step1_iter = iter([BucketListingRef(suri)])
       post_step1_iter = PluralityCheckableIterator(post_step1_iter)

       # Step 2: Expand bucket subdirs and versions. The output from this
       # step is an iterator of (names_container, BucketListingRef).
       # Starting with gs://bucket/abcd this step would expand to:
       #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
       if self.flat and self.recursion_requested:
         post_step2_iter = _ImplicitBucketSubdirIterator(self,
             post_step1_iter, self.flat)
       elif self.all_versions:
         post_step2_iter = _AllVersionIterator(self, post_step1_iter,
                                               headers=self.headers)
       else:
         post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
       post_step2_iter = PluralityCheckableIterator(post_step2_iter)

       # Step 3. Expand directories and buckets. This step yields the iterated
       # values. Starting with gs://bucket this step would expand to:
       #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
       # Starting with file://dir this step would expand to:
       #  [dir/a.txt, dir/b.txt, dir/c/]
       exp_src_bucket_listing_refs = []
       wc = self._flatness_wildcard[self.flat]
       src_uri_expands_to_multi = (post_step1_iter.has_plurality()
                                   or post_step2_iter.has_plurality())
       is_multi_src_request = (self.uri_strs.has_plurality()
                               or src_uri_expands_to_multi)

       if post_step2_iter.is_empty():
         raise CommandException('No URIs matched: %s' % uri_str)
       for (names_container, blr) in post_step2_iter:
         if (not blr.GetUri().names_container()
             and (self.flat or not blr.HasPrefix())):
           yield NameExpansionResult(uri_str, is_multi_src_request,
                                     src_uri_expands_to_multi, names_container,
                                     blr.GetUriString(),
                                     self.have_existing_dst_container,
                                     is_latest=blr.IsLatest())
           continue
         if not self.recursion_requested:
           if blr.GetUri().is_file_uri():
             desc = 'directory'
           else:
             desc = 'bucket'
           print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
               desc, blr.GetUri(), self.command_name)
           continue
         if blr.GetUri().is_file_uri():
           # Convert dir to implicit recursive wildcard.
           uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc)
         else:
           # Convert bucket to implicit recursive wildcard.
           uri_to_iterate = blr.GetUri().clone_replace_name(wc)
         wc_iter = PluralityCheckableIterator(
             self._WildcardIterator(uri_to_iterate))
         src_uri_expands_to_multi = (src_uri_expands_to_multi
                                     or wc_iter.has_plurality())
         is_multi_src_request = (self.uri_strs.has_plurality()
                                 or src_uri_expands_to_multi)
         for blr in wc_iter:
           yield NameExpansionResult(uri_str, is_multi_src_request,
                                     src_uri_expands_to_multi, True,
                                     blr.GetUriString(),
                                     self.have_existing_dst_container,
                                     is_latest=blr.IsLatest())

   def _WildcardIterator(self, uri_or_str):
     """
     Helper to instantiate gslib.WildcardIterator. Args are same as
     gslib.WildcardIterator interface, but this method fills in most of the
     values from instance state.

     Args:
       uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
     """
     return wildcard_iterator.wildcard_iterator(
         uri_or_str, self.proj_id_handler,
         bucket_storage_uri_class=self.bucket_storage_uri_class,
         headers=self.headers, debug=self.debug,
         all_versions=self.all_versions)


 def NameExpansionIterator(command_name, proj_id_handler, headers, debug,
                           bucket_storage_uri_class, uri_strs,
                           recursion_requested,
                           have_existing_dst_container=None, flat=True,
                           all_versions=False,
                           for_all_version_delete=False):
   """
   Static factory function for instantiating _NameExpansionIterator, which
   wraps the resulting iterator in a PluralityCheckableIterator and checks
   that it is non-empty. Also, allows uri_strs can be either an array or an
   iterator.

   Args:
     command_name: name of command being run.
     proj_id_handler: ProjectIdHandler to use for current command.
     headers: Dictionary containing optional HTTP headers to pass to boto.
     debug: Debug level to pass in to boto connection (range 0..3).
     bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
         Settable for testing/mocking.
     uri_strs: PluralityCheckableIterator of URI strings needing expansion.
     recursion_requested: True if -R specified on command-line.
     have_existing_dst_container: Bool indicator whether this is a copy
         request to an existing bucket, bucket subdir, or directory. Default
         None value should be used in cases where this is not needed (commands
         other than cp).
     flat: Bool indicating whether bucket listings should be flattened, i.e.,
         so the mapped-to results contain objects spanning subdirectories.
     all_versions: Bool indicating whether to iterate over all object versions.
     for_all_version_delete: Bool indicating whether this is for an all-version
         delete.

   Examples of ExpandWildcardsAndContainers with flat=True:
     - Calling with one of the uri_strs being 'gs://bucket' will enumerate all
       top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
     - 'gs://bucket/**' will enumerate all objects in the bucket.
     - 'gs://bucket/abc' will enumerate all next-level objects under directory
       abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
       matches any objects; otherwise it will enumerate the single name
       gs://bucket/abc
     - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
       subdirectories.
     - 'file:///tmp' will enumerate all files under /tmp, as will
       'file:///tmp/*'
     - 'file:///tmp/**' will enumerate all files under /tmp or any of its
       subdirectories.

   Example if flat=False: calling with gs://bucket/abc/* lists matching objects
   or subdirs, but not sub-subdirs or objects beneath subdirs.

   Note: In step-by-step comments below we give examples assuming there's a
   gs://bucket with object paths:
     abcd/o1.txt
     abcd/o2.txt
     xyz/o1.txt
     xyz/o2.txt
   and a directory file://dir with file paths:
     dir/a.txt
     dir/b.txt
     dir/c/
   """
   uri_strs = PluralityCheckableIterator(uri_strs)
   name_expansion_iterator = _NameExpansionIterator(
       command_name, proj_id_handler, headers, debug, bucket_storage_uri_class,
       uri_strs, recursion_requested, have_existing_dst_container, flat,
       all_versions=all_versions, for_all_version_delete=for_all_version_delete)
   name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator)
   if name_expansion_iterator.is_empty():
     raise CommandException('No URIs matched')
   return name_expansion_iterator


 class NameExpansionIteratorQueue(object):
   """
   Wrapper around NameExpansionIterator that provides a Multiprocessing.Queue
   facade.

   Only a blocking get() function can be called, and the block and timeout
   params on that function are ignored. All other class functions raise
   NotImplementedError.

   This class is thread safe.
   """

   def __init__(self, name_expansion_iterator, final_value):
     self.name_expansion_iterator = name_expansion_iterator
     self.final_value = final_value
     self.lock = threading.Lock()

   def qsize(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.qsize() not implemented")

   def empty(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.empty() not implemented")

   def full(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.full() not implemented")

   def put(self, obj=None, block=None, timeout=None):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.put() not implemented")

   def put_nowait(self, obj):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.put_nowait() not implemented")

   def get(self, block=None, timeout=None):
     self.lock.acquire()
     try:
       if self.name_expansion_iterator.is_empty():
         return self.final_value
       return self.name_expansion_iterator.next()
     finally:
       self.lock.release()

   def get_nowait(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.get_nowait() not implemented")

   def get_no_wait(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.get_no_wait() not implemented")

   def close(self):
      raise NotImplementedError(
          "NameExpansionIteratorQueue.close() not implemented")

   def join_thread(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.join_thread() not implemented")

   def cancel_join_thread(self):
     raise NotImplementedError(
         "NameExpansionIteratorQueue.cancel_join_thread() not implemented")


 class _NonContainerTuplifyIterator(object):
   """
   Iterator that produces the tuple (False, blr) for each iteration
   of blr_iter. Used for cases where blr_iter iterates over a set of
   BucketListingRefs known not to name containers.
   """

   def __init__(self, blr_iter):
     """
     Args:
       blr_iter: iterator of BucketListingRef.
     """
     self.blr_iter = blr_iter

   def __iter__(self):
     for blr in self.blr_iter:
       yield (False, blr)


 class _ImplicitBucketSubdirIterator(object):

   """
   Iterator wrapper that iterates over blr_iter, performing implicit bucket
   subdir expansion.

   Each iteration yields tuple (names_container, expanded BucketListingRefs)
     where names_container is true if URI names a directory, bucket,
     or bucket subdir (vs how StorageUri.names_container() doesn't
     handle latter case).

   For example, iterating over [BucketListingRef("gs://abc")] would expand to:
     [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
   if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
   """

   def __init__(self, name_expansion_instance, blr_iter, flat):
     """
     Args:
       name_expansion_instance: calling instance of NameExpansion class.
       blr_iter: iterator of BucketListingRef.
       flat: bool indicating whether bucket listings should be flattened, i.e.,
           so the mapped-to results contain objects spanning subdirectories.
     """
     self.blr_iter = blr_iter
     self.name_expansion_instance = name_expansion_instance
     self.flat = flat

   def __iter__(self):
     for blr in self.blr_iter:
       uri = blr.GetUri()
       if uri.names_object():
         # URI could be a bucket subdir.
         implicit_subdir_iterator = PluralityCheckableIterator(
             self.name_expansion_instance._WildcardIterator(
                 self.name_expansion_instance.suri_builder.StorageUri(
                     '%s/%s' % (uri.uri.rstrip('/'),
                     self.name_expansion_instance._flatness_wildcard[
                         self.flat]))))
         if not implicit_subdir_iterator.is_empty():
           for exp_blr in implicit_subdir_iterator:
             yield (True, exp_blr)
         else:
           yield (False, blr)
       else:
         yield (False, blr)

 class _AllVersionIterator(object):
   """
   Iterator wrapper that iterates over blr_iter, performing implicit version
   expansion.

   Output behavior is identical to that in _ImplicitBucketSubdirIterator above.

   For example, iterating over [BucketListingRef("gs://abc/o1")] would expand to:
     [BucketListingRef("gs://abc/o1#1234"), BucketListingRef("gs://abc/o1#1235")]
   """

   def __init__(self, name_expansion_instance, blr_iter, headers=None):
     """
     Args:
       name_expansion_instance: calling instance of NameExpansion class.
       blr_iter: iterator of BucketListingRef.
       flat: bool indicating whether bucket listings should be flattened, i.e.,
           so the mapped-to results contain objects spanning subdirectories.
     """
     self.blr_iter = blr_iter
     self.name_expansion_instance = name_expansion_instance
     self.headers = headers

   def __iter__(self):
     empty = True
     for blr in self.blr_iter:
       uri = blr.GetUri()
       if not uri.names_object():
         empty = False
         yield (True, blr)
         break
       for key in uri.list_bucket(
           prefix=uri.object_name, headers=self.headers, all_versions=True):
         if key.name != uri.object_name:
           # The desired entries will be alphabetically first in this listing.
           break
         version_blr = BucketListingRef(uri.clone_replace_key(key), key=key)
         empty = False
         yield (False, version_blr)
       # If no version exists, yield the unversioned blr, and let the consuming
       # operation fail. This mirrors behavior in _ImplicitBucketSubdirIterator.
       if empty:
         yield (False, blr)
	# Copyright 2012 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import copy
	import threading
	import wildcard_iterator

	from bucket_listing_ref import BucketListingRef
	from gslib.exception import CommandException
	from gslib.plurality_checkable_iterator import PluralityCheckableIterator
	from gslib.storage_uri_builder import StorageUriBuilder
	from wildcard_iterator import ContainsWildcard

	"""
	Name expansion support for the various ways gsutil lets users refer to
	collections of data (via explicit wildcarding as well as directory,
	bucket, and bucket subdir implicit wildcarding). This class encapsulates
	the various rules for determining how these expansions are done.
	"""


	class NameExpansionResult(object):
	"""
	Holds one fully expanded result from iterating over NameExpansionIterator.

	The member data in this class need to be pickleable because
	NameExpansionResult instances are passed through Multiprocessing.Queue. In
	particular, don't include any boto state like StorageUri, since that pulls
	in a big tree of objects, some of which aren't pickleable (and even if
	they were, pickling/unpickling such a large object tree would result in
	significant overhead).

	The state held in this object is needed for handling the various naming cases
	(e.g., copying from a single source URI to a directory generates different
	dest URI names than copying multiple URIs to a directory, to be consistent
	with naming rules used by the Unix cp command). For more details see comments
	in _NameExpansionIterator.
	"""

	def __init__(self, src_uri_str, is_multi_src_request,
	src_uri_expands_to_multi, names_container, expanded_uri_str,
	have_existing_dst_container=None, is_latest=False):
	"""
	Args:
	src_uri_str: string representation of StorageUri that was expanded.
	is_multi_src_request: bool indicator whether src_uri_str expanded to more
	than 1 BucketListingRef.
	src_uri_expands_to_multi: bool indicator whether the current src_uri
	expanded to more than 1 BucketListingRef.
	names_container: Bool indicator whether src_uri names a container.
	expanded_uri_str: string representation of StorageUri to which src_uri_str
	expands.
	have_existing_dst_container: bool indicator whether this is a copy
	request to an existing bucket, bucket subdir, or directory. Default
	None value should be used in cases where this is not needed (commands
	other than cp).
	is_latest: Bool indicating that the result represents the object's current
	version.
	"""
	self.src_uri_str = src_uri_str
	self.is_multi_src_request = is_multi_src_request
	self.src_uri_expands_to_multi = src_uri_expands_to_multi
	self.names_container = names_container
	self.expanded_uri_str = expanded_uri_str
	self.have_existing_dst_container = have_existing_dst_container
	self.is_latest = is_latest

	def __repr__(self):
	return '%s' % self.expanded_uri_str

	def IsEmpty(self):
	"""Returns True if name expansion yielded no matches."""
	return self.expanded_blr is None

	def GetSrcUriStr(self):
	"""Returns the string representation of the StorageUri that was expanded."""
	return self.src_uri_str

	def IsMultiSrcRequest(self):
	"""
	Returns bool indicator whether name expansion resulted in more than 0
	BucketListingRef.
	"""
	return self.is_multi_src_request

	def SrcUriExpandsToMulti(self):
	"""
	Returns bool indicator whether the current src_uri expanded to more than
	1 BucketListingRef
	"""
	return self.src_uri_expands_to_multi

	def NamesContainer(self):
	"""
	Returns bool indicator of whether src_uri names a directory, bucket, or
	bucket subdir.
	"""
	return self.names_container

	def GetExpandedUriStr(self):
	"""
	Returns the string representation of StorageUri to which src_uri_str
	expands.
	"""
	return self.expanded_uri_str

	def HaveExistingDstContainer(self):
	"""Returns bool indicator whether this is a copy request to an
	existing bucket, bucket subdir, or directory, or None if not
	relevant."""
	return self.have_existing_dst_container


	class _NameExpansionIterator(object):
	"""
	Iterates over all src_uris, expanding wildcards, object-less bucket names,
	subdir bucket names, and directory names, generating a flat listing of all
	the matching objects/files.

	You should instantiate this object using the static factory function
	NameExpansionIterator, because consumers of this iterator need the
	PluralityCheckableIterator wrapper built by that function.

	Yields:
	gslib.name_expansion.NameExpansionResult.

	Raises:
	CommandException: if errors encountered.
	"""

	def __init__(self, command_name, proj_id_handler, headers, debug,
	bucket_storage_uri_class, uri_strs, recursion_requested,
	have_existing_dst_container=None, flat=True,
	all_versions=False, for_all_version_delete=False):
	"""
	Args:
	command_name: name of command being run.
	proj_id_handler: ProjectIdHandler to use for current command.
	headers: Dictionary containing optional HTTP headers to pass to boto.
	debug: Debug level to pass in to boto connection (range 0..3).
	bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
	Settable for testing/mocking.
	uri_strs: PluralityCheckableIterator of URI strings needing expansion.
	recursion_requested: True if -R specified on command-line.
	have_existing_dst_container: Bool indicator whether this is a copy
	request to an existing bucket, bucket subdir, or directory. Default
	None value should be used in cases where this is not needed (commands
	other than cp).
	flat: Bool indicating whether bucket listings should be flattened, i.e.,
	so the mapped-to results contain objects spanning subdirectories.
	all_versions: Bool indicating whether to iterate over all object versions.
	for_all_version_delete: Bool indicating whether this is for an all-version
	delete.

	Examples of _NameExpansionIterator with flat=True:
	- Calling with one of the uri_strs being 'gs://bucket' will enumerate all
	top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
	- 'gs://bucket/**' will enumerate all objects in the bucket.
	- 'gs://bucket/abc' will enumerate all next-level objects under directory
	abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
	matches any objects; otherwise it will enumerate the single name
	gs://bucket/abc
	- 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
	subdirectories.
	- 'file:///tmp' will enumerate all files under /tmp, as will
	'file:///tmp/*'
	- 'file:///tmp/**' will enumerate all files under /tmp or any of its
	subdirectories.

	Example if flat=False: calling with gs://bucket/abc/* lists matching objects
	or subdirs, but not sub-subdirs or objects beneath subdirs.

	Note: In step-by-step comments below we give examples assuming there's a
	gs://bucket with object paths:
	abcd/o1.txt
	abcd/o2.txt
	xyz/o1.txt
	xyz/o2.txt
	and a directory file://dir with file paths:
	dir/a.txt
	dir/b.txt
	dir/c/
	"""
	self.command_name = command_name
	self.proj_id_handler = proj_id_handler
	self.headers = headers
	self.debug = debug
	self.bucket_storage_uri_class = bucket_storage_uri_class
	self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class)
	self.uri_strs = uri_strs
	self.recursion_requested = recursion_requested
	self.have_existing_dst_container = have_existing_dst_container
	self.flat = flat
	self.all_versions = all_versions

	# Map holding wildcard strings to use for flat vs subdir-by-subdir listings.
	# (A flat listing means show all objects expanded all the way down.)
	self._flatness_wildcard = {True: '*', False: ''}

	def __iter__(self):
	for uri_str in self.uri_strs:
	# Step 1: Expand any explicitly specified wildcards. The output from this
	# step is an iterator of BucketListingRef.
	# Starting with gs://buck/abc this step would expand to gs://bucket/abcd
	if ContainsWildcard(uri_str):
	post_step1_iter = self._WildcardIterator(uri_str)
	else:
	suri = self.suri_builder.StorageUri(uri_str)
	post_step1_iter = iter([BucketListingRef(suri)])
	post_step1_iter = PluralityCheckableIterator(post_step1_iter)

	# Step 2: Expand bucket subdirs and versions. The output from this
	# step is an iterator of (names_container, BucketListingRef).
	# Starting with gs://bucket/abcd this step would expand to:
	# iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
	if self.flat and self.recursion_requested:
	post_step2_iter = _ImplicitBucketSubdirIterator(self,
	post_step1_iter, self.flat)
	elif self.all_versions:
	post_step2_iter = _AllVersionIterator(self, post_step1_iter,
	headers=self.headers)
	else:
	post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
	post_step2_iter = PluralityCheckableIterator(post_step2_iter)

	# Step 3. Expand directories and buckets. This step yields the iterated
	# values. Starting with gs://bucket this step would expand to:
	# [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
	# Starting with file://dir this step would expand to:
	# [dir/a.txt, dir/b.txt, dir/c/]
	exp_src_bucket_listing_refs = []
	wc = self._flatness_wildcard[self.flat]
	src_uri_expands_to_multi = (post_step1_iter.has_plurality()
	or post_step2_iter.has_plurality())
	is_multi_src_request = (self.uri_strs.has_plurality()
	or src_uri_expands_to_multi)

	if post_step2_iter.is_empty():
	raise CommandException('No URIs matched: %s' % uri_str)
	for (names_container, blr) in post_step2_iter:
	if (not blr.GetUri().names_container()
	and (self.flat or not blr.HasPrefix())):
	yield NameExpansionResult(uri_str, is_multi_src_request,
	src_uri_expands_to_multi, names_container,
	blr.GetUriString(),
	self.have_existing_dst_container,
	is_latest=blr.IsLatest())
	continue
	if not self.recursion_requested:
	if blr.GetUri().is_file_uri():
	desc = 'directory'
	else:
	desc = 'bucket'
	print 'Omitting %s "%s". (Did you mean to do %s -R?)' % (
	desc, blr.GetUri(), self.command_name)
	continue
	if blr.GetUri().is_file_uri():
	# Convert dir to implicit recursive wildcard.
	uri_to_iterate = '%s/%s' % (blr.GetUriString(), wc)
	else:
	# Convert bucket to implicit recursive wildcard.
	uri_to_iterate = blr.GetUri().clone_replace_name(wc)
	wc_iter = PluralityCheckableIterator(
	self._WildcardIterator(uri_to_iterate))
	src_uri_expands_to_multi = (src_uri_expands_to_multi
	or wc_iter.has_plurality())
	is_multi_src_request = (self.uri_strs.has_plurality()
	or src_uri_expands_to_multi)
	for blr in wc_iter:
	yield NameExpansionResult(uri_str, is_multi_src_request,
	src_uri_expands_to_multi, True,
	blr.GetUriString(),
	self.have_existing_dst_container,
	is_latest=blr.IsLatest())

	def _WildcardIterator(self, uri_or_str):
	"""
	Helper to instantiate gslib.WildcardIterator. Args are same as
	gslib.WildcardIterator interface, but this method fills in most of the
	values from instance state.

	Args:
	uri_or_str: StorageUri or URI string naming wildcard objects to iterate.
	"""
	return wildcard_iterator.wildcard_iterator(
	uri_or_str, self.proj_id_handler,
	bucket_storage_uri_class=self.bucket_storage_uri_class,
	headers=self.headers, debug=self.debug,
	all_versions=self.all_versions)


	def NameExpansionIterator(command_name, proj_id_handler, headers, debug,
	bucket_storage_uri_class, uri_strs,
	recursion_requested,
	have_existing_dst_container=None, flat=True,
	all_versions=False,
	for_all_version_delete=False):
	"""
	Static factory function for instantiating _NameExpansionIterator, which
	wraps the resulting iterator in a PluralityCheckableIterator and checks
	that it is non-empty. Also, allows uri_strs can be either an array or an
	iterator.

	Args:
	command_name: name of command being run.
	proj_id_handler: ProjectIdHandler to use for current command.
	headers: Dictionary containing optional HTTP headers to pass to boto.
	debug: Debug level to pass in to boto connection (range 0..3).
	bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
	Settable for testing/mocking.
	uri_strs: PluralityCheckableIterator of URI strings needing expansion.
	recursion_requested: True if -R specified on command-line.
	have_existing_dst_container: Bool indicator whether this is a copy
	request to an existing bucket, bucket subdir, or directory. Default
	None value should be used in cases where this is not needed (commands
	other than cp).
	flat: Bool indicating whether bucket listings should be flattened, i.e.,
	so the mapped-to results contain objects spanning subdirectories.
	all_versions: Bool indicating whether to iterate over all object versions.
	for_all_version_delete: Bool indicating whether this is for an all-version
	delete.

	Examples of ExpandWildcardsAndContainers with flat=True:
	- Calling with one of the uri_strs being 'gs://bucket' will enumerate all
	top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
	- 'gs://bucket/**' will enumerate all objects in the bucket.
	- 'gs://bucket/abc' will enumerate all next-level objects under directory
	abc (i.e., not including subdirectories of abc) if gs://bucket/abc/*
	matches any objects; otherwise it will enumerate the single name
	gs://bucket/abc
	- 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
	subdirectories.
	- 'file:///tmp' will enumerate all files under /tmp, as will
	'file:///tmp/*'
	- 'file:///tmp/**' will enumerate all files under /tmp or any of its
	subdirectories.

	Example if flat=False: calling with gs://bucket/abc/* lists matching objects
	or subdirs, but not sub-subdirs or objects beneath subdirs.

	Note: In step-by-step comments below we give examples assuming there's a
	gs://bucket with object paths:
	abcd/o1.txt
	abcd/o2.txt
	xyz/o1.txt
	xyz/o2.txt
	and a directory file://dir with file paths:
	dir/a.txt
	dir/b.txt
	dir/c/
	"""
	uri_strs = PluralityCheckableIterator(uri_strs)
	name_expansion_iterator = _NameExpansionIterator(
	command_name, proj_id_handler, headers, debug, bucket_storage_uri_class,
	uri_strs, recursion_requested, have_existing_dst_container, flat,
	all_versions=all_versions, for_all_version_delete=for_all_version_delete)
	name_expansion_iterator = PluralityCheckableIterator(name_expansion_iterator)
	if name_expansion_iterator.is_empty():
	raise CommandException('No URIs matched')
	return name_expansion_iterator


	class NameExpansionIteratorQueue(object):
	"""
	Wrapper around NameExpansionIterator that provides a Multiprocessing.Queue
	facade.

	Only a blocking get() function can be called, and the block and timeout
	params on that function are ignored. All other class functions raise
	NotImplementedError.

	This class is thread safe.
	"""

	def __init__(self, name_expansion_iterator, final_value):
	self.name_expansion_iterator = name_expansion_iterator
	self.final_value = final_value
	self.lock = threading.Lock()

	def qsize(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.qsize() not implemented")

	def empty(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.empty() not implemented")

	def full(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.full() not implemented")

	def put(self, obj=None, block=None, timeout=None):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.put() not implemented")

	def put_nowait(self, obj):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.put_nowait() not implemented")

	def get(self, block=None, timeout=None):
	self.lock.acquire()
	try:
	if self.name_expansion_iterator.is_empty():
	return self.final_value
	return self.name_expansion_iterator.next()
	finally:
	self.lock.release()

	def get_nowait(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.get_nowait() not implemented")

	def get_no_wait(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.get_no_wait() not implemented")

	def close(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.close() not implemented")

	def join_thread(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.join_thread() not implemented")

	def cancel_join_thread(self):
	raise NotImplementedError(
	"NameExpansionIteratorQueue.cancel_join_thread() not implemented")


	class _NonContainerTuplifyIterator(object):
	"""
	Iterator that produces the tuple (False, blr) for each iteration
	of blr_iter. Used for cases where blr_iter iterates over a set of
	BucketListingRefs known not to name containers.
	"""

	def __init__(self, blr_iter):
	"""
	Args:
	blr_iter: iterator of BucketListingRef.
	"""
	self.blr_iter = blr_iter

	def __iter__(self):
	for blr in self.blr_iter:
	yield (False, blr)


	class _ImplicitBucketSubdirIterator(object):

	"""
	Iterator wrapper that iterates over blr_iter, performing implicit bucket
	subdir expansion.

	Each iteration yields tuple (names_container, expanded BucketListingRefs)
	where names_container is true if URI names a directory, bucket,
	or bucket subdir (vs how StorageUri.names_container() doesn't
	handle latter case).

	For example, iterating over [BucketListingRef("gs://abc")] would expand to:
	[BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
	if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
	"""

	def __init__(self, name_expansion_instance, blr_iter, flat):
	"""
	Args:
	name_expansion_instance: calling instance of NameExpansion class.
	blr_iter: iterator of BucketListingRef.
	flat: bool indicating whether bucket listings should be flattened, i.e.,
	so the mapped-to results contain objects spanning subdirectories.
	"""
	self.blr_iter = blr_iter
	self.name_expansion_instance = name_expansion_instance
	self.flat = flat

	def __iter__(self):
	for blr in self.blr_iter:
	uri = blr.GetUri()
	if uri.names_object():
	# URI could be a bucket subdir.
	implicit_subdir_iterator = PluralityCheckableIterator(
	self.name_expansion_instance._WildcardIterator(
	self.name_expansion_instance.suri_builder.StorageUri(
	'%s/%s' % (uri.uri.rstrip('/'),
	self.name_expansion_instance._flatness_wildcard[
	self.flat]))))
	if not implicit_subdir_iterator.is_empty():
	for exp_blr in implicit_subdir_iterator:
	yield (True, exp_blr)
	else:
	yield (False, blr)
	else:
	yield (False, blr)

	class _AllVersionIterator(object):
	"""
	Iterator wrapper that iterates over blr_iter, performing implicit version
	expansion.

	Output behavior is identical to that in _ImplicitBucketSubdirIterator above.

	For example, iterating over [BucketListingRef("gs://abc/o1")] would expand to:
	[BucketListingRef("gs://abc/o1#1234"), BucketListingRef("gs://abc/o1#1235")]
	"""

	def __init__(self, name_expansion_instance, blr_iter, headers=None):
	"""
	Args:
	name_expansion_instance: calling instance of NameExpansion class.
	blr_iter: iterator of BucketListingRef.
	flat: bool indicating whether bucket listings should be flattened, i.e.,
	so the mapped-to results contain objects spanning subdirectories.
	"""
	self.blr_iter = blr_iter
	self.name_expansion_instance = name_expansion_instance
	self.headers = headers

	def __iter__(self):
	empty = True
	for blr in self.blr_iter:
	uri = blr.GetUri()
	if not uri.names_object():
	empty = False
	yield (True, blr)
	break
	for key in uri.list_bucket(
	prefix=uri.object_name, headers=self.headers, all_versions=True):
	if key.name != uri.object_name:
	# The desired entries will be alphabetically first in this listing.
	break
	version_blr = BucketListingRef(uri.clone_replace_key(key), key=key)
	empty = False
	yield (False, version_blr)
	# If no version exists, yield the unversioned blr, and let the consuming
	# operation fail. This mirrors behavior in _ImplicitBucketSubdirIterator.
	if empty:
	yield (False, blr)