普通文本  |  1659行  |  70.7 KB

# -*- coding: utf-8 -*-
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""

from __future__ import absolute_import

import base64
import binascii
import datetime
import errno
import httplib
import json
import multiprocessing
import os
import pickle
import random
import re
import socket
import tempfile
import textwrap
import threading
import time
import xml
from xml.dom.minidom import parseString as XmlParseString
from xml.sax import _exceptions as SaxExceptions

import boto
from boto import handler
from boto.exception import ResumableDownloadException as BotoResumableDownloadException
from boto.exception import ResumableTransferDisposition
from boto.gs.cors import Cors
from boto.gs.lifecycle import LifecycleConfig
from boto.s3.cors import CORSConfiguration as S3Cors
from boto.s3.deletemarker import DeleteMarker
from boto.s3.lifecycle import Lifecycle as S3Lifecycle
from boto.s3.prefix import Prefix

from gslib.boto_resumable_upload import BotoResumableUpload
from gslib.cloud_api import AccessDeniedException
from gslib.cloud_api import ArgumentException
from gslib.cloud_api import BadRequestException
from gslib.cloud_api import CloudApi
from gslib.cloud_api import NotEmptyException
from gslib.cloud_api import NotFoundException
from gslib.cloud_api import PreconditionException
from gslib.cloud_api import ResumableDownloadException
from gslib.cloud_api import ResumableUploadAbortException
from gslib.cloud_api import ResumableUploadException
from gslib.cloud_api import ResumableUploadStartOverException
from gslib.cloud_api import ServiceException
from gslib.cloud_api_helper import ValidateDstObjectMetadata
# Imported for boto AuthHandler purposes.
import gslib.devshell_auth_plugin  # pylint: disable=unused-import
from gslib.exception import CommandException
from gslib.exception import InvalidUrlError
from gslib.hashing_helper import Base64EncodeHash
from gslib.hashing_helper import Base64ToHexHash
from gslib.project_id import GOOG_PROJ_ID_HDR
from gslib.project_id import PopulateProjectId
from gslib.storage_url import StorageUrlFromString
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.translation_helper import AclTranslation
from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
from gslib.translation_helper import CorsTranslation
from gslib.translation_helper import CreateBucketNotFoundException
from gslib.translation_helper import CreateNotFoundExceptionForObjectWrite
from gslib.translation_helper import CreateObjectNotFoundException
from gslib.translation_helper import DEFAULT_CONTENT_TYPE
from gslib.translation_helper import EncodeStringAsLong
from gslib.translation_helper import GenerationFromUrlAndString
from gslib.translation_helper import HeadersFromObjectMetadata
from gslib.translation_helper import LifecycleTranslation
from gslib.translation_helper import REMOVE_CORS_CONFIG
from gslib.translation_helper import S3MarkerAclFromObjectMetadata
from gslib.util import ConfigureNoOpAuthIfNeeded
from gslib.util import DEFAULT_FILE_BUFFER_SIZE
from gslib.util import GetMaxRetryDelay
from gslib.util import GetNumRetries
from gslib.util import S3_DELETE_MARKER_GUID
from gslib.util import TWO_MIB
from gslib.util import UnaryDictToXml
from gslib.util import UTF8
from gslib.util import XML_PROGRESS_CALLBACKS

TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
                                boto.exception.InvalidUriError,
                                boto.exception.ResumableDownloadException,
                                boto.exception.ResumableUploadException,
                                boto.exception.StorageCreateError,
                                boto.exception.StorageResponseError)

# pylint: disable=global-at-module-level
global boto_auth_initialized, boto_auth_initialized_lock
# If multiprocessing is available, these will be overridden to process-safe
# variables in InitializeMultiprocessingVariables.
boto_auth_initialized_lock = threading.Lock()
boto_auth_initialized = False

NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
                                       flags=re.DOTALL)
# Determines whether an etag is a valid MD5.
MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')


def InitializeMultiprocessingVariables():  # pylint: disable=invalid-name
  """Perform necessary initialization for multiprocessing.

    See gslib.command.InitializeMultiprocessingVariables for an explanation
    of why this is necessary.
  """
  # pylint: disable=global-variable-undefined
  global boto_auth_initialized, boto_auth_initialized_lock
  boto_auth_initialized_lock = gslib.util.CreateLock()
  boto_auth_initialized = multiprocessing.Value('i', 0)


class DownloadProxyCallbackHandler(object):
  """Intermediary callback to keep track of the number of bytes downloaded."""

  def __init__(self, start_byte, callback):
    self._start_byte = start_byte
    self._callback = callback

  def call(self, bytes_downloaded, total_size):
    """Saves necessary data and then calls the given Cloud API callback.

    Args:
      bytes_downloaded: Number of bytes processed so far.
      total_size: Total size of the ongoing operation.
    """
    if self._callback:
      self._callback(self._start_byte + bytes_downloaded, total_size)


class BotoTranslation(CloudApi):
  """Boto-based XML translation implementation of gsutil Cloud API.

  This class takes gsutil Cloud API objects, translates them to XML service
  calls, and translates the results back into gsutil Cloud API objects for
  use by the caller.
  """

  def __init__(self, bucket_storage_uri_class, logger, provider=None,
               credentials=None, debug=0, trace_token=None):
    """Performs necessary setup for interacting with the cloud storage provider.

    Args:
      bucket_storage_uri_class: boto storage_uri class, used by APIs that
                                provide boto translation or mocking.
      logger: logging.logger for outputting log messages.
      provider: Provider prefix describing cloud storage provider to connect to.
                'gs' and 's3' are supported. Function implementations ignore
                the provider argument and use this one instead.
      credentials: Unused.
      debug: Debug level for the API implementation (0..3).
      trace_token: Unused in this subclass.
    """
    super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
                                          provider=provider, debug=debug)
    _ = credentials
    # pylint: disable=global-variable-undefined, global-variable-not-assigned
    global boto_auth_initialized, boto_auth_initialized_lock
    with boto_auth_initialized_lock:
      ConfigureNoOpAuthIfNeeded()
      if isinstance(boto_auth_initialized, bool):
        boto_auth_initialized = True
      else:
        boto_auth_initialized.value = 1
    self.api_version = boto.config.get_value(
        'GSUtil', 'default_api_version', '1')

  def GetBucket(self, bucket_name, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    bucket_uri = self._StorageUriForBucket(bucket_name)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    try:
      return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
                                                            headers=headers),
                                      fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)

  def ListBuckets(self, project_id=None, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    get_fields = self._ListToGetFields(list_fields=fields)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    if self.provider == 'gs':
      headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
    try:
      provider_uri = boto.storage_uri(
          '%s://' % self.provider,
          suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)

      buckets_iter = provider_uri.get_all_buckets(headers=headers)
      for bucket in buckets_iter:
        if self.provider == 's3' and bucket.name.lower() != bucket.name:
          # S3 listings can return buckets with upper-case names, but boto
          # can't successfully call them.
          continue
        yield self._BotoBucketToBucket(bucket, fields=get_fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

  def PatchBucket(self, bucket_name, metadata, canned_acl=None,
                  canned_def_acl=None, preconditions=None, provider=None,
                  fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    bucket_uri = self._StorageUriForBucket(bucket_name)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    try:
      self._AddPreconditionsToHeaders(preconditions, headers)
      if metadata.acl:
        boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
        bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
      if canned_acl:
        canned_acls = bucket_uri.canned_acls()
        if canned_acl not in canned_acls:
          raise CommandException('Invalid canned ACL "%s".' % canned_acl)
        bucket_uri.set_acl(canned_acl, bucket_uri.object_name)
      if canned_def_acl:
        canned_acls = bucket_uri.canned_acls()
        if canned_def_acl not in canned_acls:
          raise CommandException('Invalid canned ACL "%s".' % canned_def_acl)
        bucket_uri.set_def_acl(canned_def_acl, bucket_uri.object_name)
      if metadata.cors:
        if metadata.cors == REMOVE_CORS_CONFIG:
          metadata.cors = []
        boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
        bucket_uri.set_cors(boto_cors, False)
      if metadata.defaultObjectAcl:
        boto_acl = AclTranslation.BotoAclFromMessage(
            metadata.defaultObjectAcl)
        bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
      if metadata.lifecycle:
        boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
            metadata.lifecycle)
        bucket_uri.configure_lifecycle(boto_lifecycle, False)
      if metadata.logging:
        if self.provider == 'gs':
          headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
        if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
          bucket_uri.enable_logging(metadata.logging.logBucket,
                                    metadata.logging.logObjectPrefix,
                                    False, headers)
        else:  # Logging field is present and empty.  Disable logging.
          bucket_uri.disable_logging(False, headers)
      if metadata.versioning:
        bucket_uri.configure_versioning(metadata.versioning.enabled,
                                        headers=headers)
      if metadata.website:
        main_page_suffix = metadata.website.mainPageSuffix
        error_page = metadata.website.notFoundPage
        bucket_uri.set_website_config(main_page_suffix, error_page)
      return self.GetBucket(bucket_name, fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)

  def CreateBucket(self, bucket_name, project_id=None, metadata=None,
                   provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    bucket_uri = self._StorageUriForBucket(bucket_name)
    location = ''
    if metadata and metadata.location:
      location = metadata.location
    # Pass storage_class param only if this is a GCS bucket. (In S3 the
    # storage class is specified on the key object.)
    headers = {}
    if bucket_uri.scheme == 'gs':
      self._AddApiVersionToHeaders(headers)
      headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
      storage_class = ''
      if metadata and metadata.storageClass:
        storage_class = metadata.storageClass
      try:
        bucket_uri.create_bucket(headers=headers, location=location,
                                 storage_class=storage_class)
      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
    else:
      try:
        bucket_uri.create_bucket(headers=headers, location=location)
      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
    return self.GetBucket(bucket_name, fields=fields)

  def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
    """See CloudApi class for function doc strings."""
    _ = provider, preconditions
    bucket_uri = self._StorageUriForBucket(bucket_name)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    try:
      bucket_uri.delete_bucket(headers=headers)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      translated_exception = self._TranslateBotoException(
          e, bucket_name=bucket_name)
      if (translated_exception and
          'BucketNotEmpty' in translated_exception.reason):
        try:
          if bucket_uri.get_versioning_config():
            if self.provider == 's3':
              raise NotEmptyException(
                  'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
                  'support listing or removing S3 DeleteMarkers, so you may '
                  'need to delete these using another tool to successfully '
                  'delete this bucket.' % bucket_name, status=e.status)
            raise NotEmptyException(
                'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status)
          else:
            raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
                                    status=e.status)
        except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
          self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
      elif translated_exception and translated_exception.status == 404:
        raise NotFoundException('Bucket %s does not exist.' % bucket_name)
      else:
        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)

  def ListObjects(self, bucket_name, prefix=None, delimiter=None,
                  all_versions=None, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    get_fields = self._ListToGetFields(list_fields=fields)
    bucket_uri = self._StorageUriForBucket(bucket_name)
    headers = {}
    yield_prefixes = fields is None or 'prefixes' in fields
    yield_objects = fields is None or any(
        field.startswith('items/') for field in fields)
    self._AddApiVersionToHeaders(headers)
    try:
      objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
                                            delimiter=delimiter or '',
                                            all_versions=all_versions,
                                            headers=headers)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)

    try:
      for key in objects_iter:
        if yield_prefixes and isinstance(key, Prefix):
          yield CloudApi.CsObjectOrPrefix(key.name,
                                          CloudApi.CsObjectOrPrefixType.PREFIX)
        elif yield_objects:
          key_to_convert = key

          # Listed keys are populated with these fields during bucket listing.
          key_http_fields = set(['bucket', 'etag', 'name', 'updated',
                                 'generation', 'metageneration', 'size'])

          # When fields == None, the caller is requesting all possible fields.
          # If the caller requested any fields that are not populated by bucket
          # listing, we'll need to make a separate HTTP call for each object to
          # get its metadata and populate the remaining fields with the result.
          if not get_fields or (get_fields and not
                                get_fields.issubset(key_http_fields)):

            generation = None
            if getattr(key, 'generation', None):
              generation = key.generation
            if getattr(key, 'version_id', None):
              generation = key.version_id
            key_to_convert = self._GetBotoKey(bucket_name, key.name,
                                              generation=generation)
          return_object = self._BotoKeyToObject(key_to_convert,
                                                fields=get_fields)

          yield CloudApi.CsObjectOrPrefix(return_object,
                                          CloudApi.CsObjectOrPrefixType.OBJECT)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)

  def GetObjectMetadata(self, bucket_name, object_name, generation=None,
                        provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    try:
      return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name,
                                                    generation=generation),
                                   fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                       object_name=object_name,
                                       generation=generation)

  def _CurryDigester(self, digester_object):
    """Curries a digester object into a form consumable by boto.

    Key instantiates its own digesters by calling hash_algs[alg]() [note there
    are no arguments to this function].  So in order to pass in our caught-up
    digesters during a resumable download, we need to pass the digester
    object but don't get to look it up based on the algorithm name.  Here we
    use a lambda to make lookup implicit.

    Args:
      digester_object: Input object to be returned by the created function.

    Returns:
      A function which when called will return the input object.
    """
    return lambda: digester_object

  def GetObjectMedia(
      self, bucket_name, object_name, download_stream, provider=None,
      generation=None, object_size=None,
      download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
      start_byte=0, end_byte=None, progress_callback=None,
      serialization_data=None, digesters=None):
    """See CloudApi class for function doc strings."""
    # This implementation will get the object metadata first if we don't pass it
    # in via serialization_data.
    headers = {}
    self._AddApiVersionToHeaders(headers)
    if 'accept-encoding' not in headers:
      headers['accept-encoding'] = 'gzip'
    if end_byte is not None:
      headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
    elif start_byte > 0:
      headers['range'] = 'bytes=%s-' % (start_byte)
    elif start_byte < 0:
      headers['range'] = 'bytes=%s' % (start_byte)

    # Since in most cases we already made a call to get the object metadata,
    # here we avoid an extra HTTP call by unpickling the key.  This is coupled
    # with the implementation in _BotoKeyToObject.
    if serialization_data:
      serialization_dict = json.loads(serialization_data)
      key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
    else:
      key = self._GetBotoKey(bucket_name, object_name, generation=generation)

    if digesters and self.provider == 'gs':
      hash_algs = {}
      for alg in digesters:
        hash_algs[alg] = self._CurryDigester(digesters[alg])
    else:
      hash_algs = {}

    total_size = object_size or 0
    if serialization_data:
      total_size = json.loads(serialization_data)['total_size']

    if total_size:
      num_progress_callbacks = max(int(total_size) / TWO_MIB,
                                   XML_PROGRESS_CALLBACKS)
    else:
      num_progress_callbacks = XML_PROGRESS_CALLBACKS

    try:
      if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
        self._PerformResumableDownload(
            download_stream, start_byte, end_byte, key,
            headers=headers, callback=progress_callback,
            num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
      elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
        self._PerformSimpleDownload(
            download_stream, key, progress_callback=progress_callback,
            num_progress_callbacks=num_progress_callbacks, headers=headers,
            hash_algs=hash_algs)
      else:
        raise ArgumentException('Unsupported DownloadStrategy: %s' %
                                download_strategy)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                       object_name=object_name,
                                       generation=generation)

    if self.provider == 's3':
      if digesters:

        class HashToDigester(object):
          """Wrapper class to expose hash digests.

          boto creates its own digesters in s3's get_file, returning on-the-fly
          hashes only by way of key.local_hashes.  To propagate the digest back
          to the caller, this stub class implements the digest() function.
          """

          def __init__(self, hash_val):
            self.hash_val = hash_val

          def digest(self):  # pylint: disable=invalid-name
            return self.hash_val

        for alg_name in digesters:
          if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
               start_byte != 0) or
              not ((getattr(key, 'local_hashes', None) and
                    alg_name in key.local_hashes))):
            # For resumable downloads, boto does not provide a mechanism to
            # catch up the hash in the case of a partially complete download.
            # In this case or in the case where no digest was successfully
            # calculated, set the digester to None, which indicates that we'll
            # need to manually calculate the hash from the local file once it
            # is complete.
            digesters[alg_name] = None
          else:
            # Use the on-the-fly hash.
            digesters[alg_name] = HashToDigester(key.local_hashes[alg_name])

  def _PerformSimpleDownload(self, download_stream, key, progress_callback=None,
                             num_progress_callbacks=XML_PROGRESS_CALLBACKS,
                             headers=None, hash_algs=None):
    if not headers:
      headers = {}
      self._AddApiVersionToHeaders(headers)
    try:
      key.get_contents_to_file(download_stream, cb=progress_callback,
                               num_cb=num_progress_callbacks, headers=headers,
                               hash_algs=hash_algs)
    except TypeError:  # s3 and mocks do not support hash_algs
      key.get_contents_to_file(download_stream, cb=progress_callback,
                               num_cb=num_progress_callbacks, headers=headers)

  def _PerformResumableDownload(self, fp, start_byte, end_byte, key,
                                headers=None, callback=None,
                                num_callbacks=XML_PROGRESS_CALLBACKS,
                                hash_algs=None):
    """Downloads bytes from key to fp, resuming as needed.

    Args:
      fp: File pointer into which data should be downloaded.
      start_byte: Start byte of the download.
      end_byte: End byte of the download.
      key: Key object from which data is to be downloaded
      headers: Headers to send when retrieving the file
      callback: (optional) a callback function that will be called to report
           progress on the download.  The callback should accept two integer
           parameters.  The first integer represents the number of
           bytes that have been successfully transmitted from the service.  The
           second represents the total number of bytes that need to be
           transmitted.
      num_callbacks: (optional) If a callback is specified with the callback
           parameter, this determines the granularity of the callback
           by defining the maximum number of times the callback will be
           called during the file transfer.
      hash_algs: Dict of hash algorithms to apply to downloaded bytes.

    Raises:
      ResumableDownloadException on error.
    """
    if not headers:
      headers = {}
      self._AddApiVersionToHeaders(headers)

    retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
                            socket.gaierror)

    debug = key.bucket.connection.debug

    num_retries = GetNumRetries()
    progress_less_iterations = 0
    last_progress_byte = start_byte

    while True:  # Retry as long as we're making progress.
      try:
        cb_handler = DownloadProxyCallbackHandler(start_byte, callback)
        headers = headers.copy()
        headers['Range'] = 'bytes=%d-%d' % (start_byte, end_byte)

        # Disable AWSAuthConnection-level retry behavior, since that would
        # cause downloads to restart from scratch.
        try:
          key.get_file(fp, headers, cb_handler.call, num_callbacks,
                       override_num_retries=0, hash_algs=hash_algs)
        except TypeError:
          key.get_file(fp, headers, cb_handler.call, num_callbacks,
                       override_num_retries=0)
        fp.flush()
        # Download succeeded.
        return
      except retryable_exceptions, e:
        if debug >= 1:
          self.logger.info('Caught exception (%s)', repr(e))
        if isinstance(e, IOError) and e.errno == errno.EPIPE:
          # Broken pipe error causes httplib to immediately
          # close the socket (http://bugs.python.org/issue5542),
          # so we need to close and reopen the key before resuming
          # the download.
          if self.provider == 's3':
            key.get_file(fp, headers, cb_handler.call, num_callbacks,
                         override_num_retries=0)
          else:  # self.provider == 'gs'
            key.get_file(fp, headers, cb_handler.call, num_callbacks,
                         override_num_retries=0, hash_algs=hash_algs)
      except BotoResumableDownloadException, e:
        if (e.disposition ==
            ResumableTransferDisposition.ABORT_CUR_PROCESS):
          raise ResumableDownloadException(e.message)
        else:
          if debug >= 1:
            self.logger.info('Caught ResumableDownloadException (%s) - will '
                             'retry', e.message)

      # At this point we had a re-tryable failure; see if made progress.
      start_byte = fp.tell()
      if start_byte > last_progress_byte:
        last_progress_byte = start_byte
        progress_less_iterations = 0
      else:
        progress_less_iterations += 1

      if progress_less_iterations > num_retries:
        # Don't retry any longer in the current process.
        raise ResumableDownloadException(
            'Too many resumable download attempts failed without '
            'progress. You might try this download again later')

      # Close the key, in case a previous download died partway
      # through and left data in the underlying key HTTP buffer.
      # Do this within a try/except block in case the connection is
      # closed (since key.close() attempts to do a final read, in which
      # case this read attempt would get an IncompleteRead exception,
      # which we can safely ignore).
      try:
        key.close()
      except httplib.IncompleteRead:
        pass

      sleep_time_secs = min(random.random() * (2 ** progress_less_iterations),
                            GetMaxRetryDelay())
      if debug >= 1:
        self.logger.info(
            'Got retryable failure (%d progress-less in a row).\nSleeping %d '
            'seconds before re-trying', progress_less_iterations,
            sleep_time_secs)
      time.sleep(sleep_time_secs)

  def PatchObjectMetadata(self, bucket_name, object_name, metadata,
                          canned_acl=None, generation=None, preconditions=None,
                          provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    object_uri = self._StorageUriForObject(bucket_name, object_name,
                                           generation=generation)

    headers = {}
    self._AddApiVersionToHeaders(headers)
    meta_headers = HeadersFromObjectMetadata(metadata, self.provider)

    metadata_plus = {}
    metadata_minus = set()
    metadata_changed = False
    for k, v in meta_headers.iteritems():
      metadata_changed = True
      if v is None:
        metadata_minus.add(k)
      else:
        metadata_plus[k] = v

    self._AddPreconditionsToHeaders(preconditions, headers)

    if metadata_changed:
      try:
        object_uri.set_metadata(metadata_plus, metadata_minus, False,
                                headers=headers)
      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                         object_name=object_name,
                                         generation=generation)

    if metadata.acl:
      boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
      try:
        object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
      except TRANSLATABLE_BOTO_EXCEPTIONS, e:
        self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                         object_name=object_name,
                                         generation=generation)
    if canned_acl:
      canned_acls = object_uri.canned_acls()
      if canned_acl not in canned_acls:
        raise CommandException('Invalid canned ACL "%s".' % canned_acl)
      object_uri.set_acl(canned_acl, object_uri.object_name)

    return self.GetObjectMetadata(bucket_name, object_name,
                                  generation=generation, fields=fields)

  def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
                           canned_acl=None, progress_callback=None,
                           headers=None):
    dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl,
                                   cb=progress_callback, headers=headers)

  def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None,
                              progress_callback=None, headers=None):
    if dst_uri.get_provider().supports_chunked_transfer():
      dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
                                       cb=progress_callback, headers=headers)
    else:
      # Provider doesn't support chunked transfer, so copy to a temporary
      # file.
      (temp_fh, temp_path) = tempfile.mkstemp()
      try:
        with open(temp_path, 'wb') as out_fp:
          stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
          while stream_bytes:
            out_fp.write(stream_bytes)
            stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
        with open(temp_path, 'rb') as in_fp:
          dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
                                         headers=headers)
      finally:
        os.close(temp_fh)
        os.unlink(temp_path)

  def _PerformResumableUpload(self, key, upload_stream, upload_size,
                              tracker_callback, canned_acl=None,
                              serialization_data=None, progress_callback=None,
                              headers=None):
    resumable_upload = BotoResumableUpload(
        tracker_callback, self.logger, resume_url=serialization_data)
    resumable_upload.SendFile(key, upload_stream, upload_size,
                              canned_acl=canned_acl, cb=progress_callback,
                              headers=headers)

  def _UploadSetup(self, object_metadata, preconditions=None):
    """Shared upload implementation.

    Args:
      object_metadata: Object metadata describing destination object.
      preconditions: Optional gsutil Cloud API preconditions.

    Returns:
      Headers dictionary, StorageUri for upload (based on inputs)
    """
    ValidateDstObjectMetadata(object_metadata)

    headers = HeadersFromObjectMetadata(object_metadata, self.provider)
    self._AddApiVersionToHeaders(headers)

    if object_metadata.crc32c:
      if 'x-goog-hash' in headers:
        headers['x-goog-hash'] += (
            ',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
      else:
        headers['x-goog-hash'] = (
            'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
    if object_metadata.md5Hash:
      if 'x-goog-hash' in headers:
        headers['x-goog-hash'] += (
            ',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
      else:
        headers['x-goog-hash'] = (
            'md5=%s' % object_metadata.md5Hash.rstrip('\n'))

    if 'content-type' in headers and not headers['content-type']:
      headers['content-type'] = 'application/octet-stream'

    self._AddPreconditionsToHeaders(preconditions, headers)

    dst_uri = self._StorageUriForObject(object_metadata.bucket,
                                        object_metadata.name)
    return headers, dst_uri

  def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
    """Set ACLs on an uploaded object and return its metadata.

    Args:
      dst_uri: Generation-specific StorageUri describing the object.
      object_metadata: Metadata for the object, including an ACL if applicable.
      fields: If present, return only these Object metadata fields.

    Returns:
      gsutil Cloud API Object metadata.

    Raises:
      CommandException if the object was overwritten / deleted concurrently.
    """
    try:
      # The XML API does not support if-generation-match for GET requests.
      # Therefore, if the object gets overwritten before the ACL and get_key
      # operations, the best we can do is warn that it happened.
      self._SetObjectAcl(object_metadata, dst_uri)
      return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
    except boto.exception.InvalidUriError as e:
      if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
        raise CommandException('\n'.join(textwrap.wrap(
            'Uploaded object (%s) was deleted or overwritten immediately '
            'after it was uploaded. This can happen if you attempt to upload '
            'to the same object multiple times concurrently.' % dst_uri.uri)))
      else:
        raise

  def _SetObjectAcl(self, object_metadata, dst_uri):
    """Sets the ACL (if present in object_metadata) on an uploaded object."""
    if object_metadata.acl:
      boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
      dst_uri.set_xml_acl(boto_acl.to_xml())
    elif self.provider == 's3':
      s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
      if s3_acl:
        dst_uri.set_xml_acl(s3_acl)

  def UploadObjectResumable(
      self, upload_stream, object_metadata, canned_acl=None, preconditions=None,
      provider=None, fields=None, size=None, serialization_data=None,
      tracker_callback=None, progress_callback=None):
    """See CloudApi class for function doc strings."""
    if self.provider == 's3':
      # Resumable uploads are not supported for s3.
      return self.UploadObject(
          upload_stream, object_metadata, canned_acl=canned_acl,
          preconditions=preconditions, fields=fields, size=size)
    headers, dst_uri = self._UploadSetup(object_metadata,
                                         preconditions=preconditions)
    if not tracker_callback:
      raise ArgumentException('No tracker callback function set for '
                              'resumable upload of %s' % dst_uri)
    try:
      self._PerformResumableUpload(dst_uri.new_key(headers=headers),
                                   upload_stream, size, tracker_callback,
                                   canned_acl=canned_acl,
                                   serialization_data=serialization_data,
                                   progress_callback=progress_callback,
                                   headers=headers)
      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
                                          fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      not_found_exception = CreateNotFoundExceptionForObjectWrite(
          self.provider, object_metadata.bucket)
      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
                                       object_name=object_metadata.name,
                                       not_found_exception=not_found_exception)

  def UploadObjectStreaming(self, upload_stream, object_metadata,
                            canned_acl=None, progress_callback=None,
                            preconditions=None, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    headers, dst_uri = self._UploadSetup(object_metadata,
                                         preconditions=preconditions)

    try:
      self._PerformStreamingUpload(
          dst_uri, upload_stream, canned_acl=canned_acl,
          progress_callback=progress_callback, headers=headers)
      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
                                          fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      not_found_exception = CreateNotFoundExceptionForObjectWrite(
          self.provider, object_metadata.bucket)
      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
                                       object_name=object_metadata.name,
                                       not_found_exception=not_found_exception)

  def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
                   preconditions=None, size=None, progress_callback=None,
                   provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    headers, dst_uri = self._UploadSetup(object_metadata,
                                         preconditions=preconditions)

    try:
      md5 = None
      if object_metadata.md5Hash:
        md5 = []
        # boto expects hex at index 0, base64 at index 1
        md5.append(Base64ToHexHash(object_metadata.md5Hash))
        md5.append(object_metadata.md5Hash.strip('\n"\''))
      self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
                                canned_acl=canned_acl,
                                progress_callback=progress_callback,
                                headers=headers)
      return self._HandleSuccessfulUpload(dst_uri, object_metadata,
                                          fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      not_found_exception = CreateNotFoundExceptionForObjectWrite(
          self.provider, object_metadata.bucket)
      self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
                                       object_name=object_metadata.name,
                                       not_found_exception=not_found_exception)

  def DeleteObject(self, bucket_name, object_name, preconditions=None,
                   generation=None, provider=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    headers = {}
    self._AddApiVersionToHeaders(headers)
    self._AddPreconditionsToHeaders(preconditions, headers)

    uri = self._StorageUriForObject(bucket_name, object_name,
                                    generation=generation)
    try:
      uri.delete_key(validate=False, headers=headers)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                       object_name=object_name,
                                       generation=generation)

  def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None,
                 canned_acl=None, preconditions=None, progress_callback=None,
                 max_bytes_per_call=None, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider

    if max_bytes_per_call is not None:
      raise NotImplementedError('XML API does not suport max_bytes_per_call')
    dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
                                        dst_obj_metadata.name)

    # Usually it's okay to treat version_id and generation as
    # the same, but in this case the underlying boto call determines the
    # provider based on the presence of one or the other.
    src_version_id = None
    if self.provider == 's3':
      src_version_id = src_generation
      src_generation = None

    headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
    self._AddApiVersionToHeaders(headers)
    self._AddPreconditionsToHeaders(preconditions, headers)

    if canned_acl:
      headers[dst_uri.get_provider().acl_header] = canned_acl

    preserve_acl = True if dst_obj_metadata.acl else False
    if self.provider == 's3':
      s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
      if s3_acl:
        preserve_acl = True

    try:
      new_key = dst_uri.copy_key(
          src_obj_metadata.bucket, src_obj_metadata.name,
          preserve_acl=preserve_acl, headers=headers,
          src_version_id=src_version_id, src_generation=src_generation)

      return self._BotoKeyToObject(new_key, fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      not_found_exception = CreateNotFoundExceptionForObjectWrite(
          self.provider, dst_obj_metadata.bucket, src_provider=self.provider,
          src_bucket_name=src_obj_metadata.bucket,
          src_object_name=src_obj_metadata.name, src_generation=src_generation)
      self._TranslateExceptionAndRaise(e, bucket_name=dst_obj_metadata.bucket,
                                       object_name=dst_obj_metadata.name,
                                       not_found_exception=not_found_exception)

  def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
                    preconditions=None, provider=None, fields=None):
    """See CloudApi class for function doc strings."""
    _ = provider
    ValidateDstObjectMetadata(dst_obj_metadata)

    dst_obj_name = dst_obj_metadata.name
    dst_obj_metadata.name = None
    dst_bucket_name = dst_obj_metadata.bucket
    dst_obj_metadata.bucket = None
    headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
    if not dst_obj_metadata.contentType:
      dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
      headers['content-type'] = dst_obj_metadata.contentType
    self._AddApiVersionToHeaders(headers)
    self._AddPreconditionsToHeaders(preconditions, headers)

    dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)

    src_components = []
    for src_obj in src_objs_metadata:
      src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
                                          generation=src_obj.generation)
      src_components.append(src_uri)

    try:
      dst_uri.compose(src_components, headers=headers)

      return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
                                    fields=fields)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
                                       dst_obj_metadata.name)

  def _AddPreconditionsToHeaders(self, preconditions, headers):
    """Adds preconditions (if any) to headers."""
    if preconditions and self.provider == 'gs':
      if preconditions.gen_match is not None:
        headers['x-goog-if-generation-match'] = str(preconditions.gen_match)
      if preconditions.meta_gen_match is not None:
        headers['x-goog-if-metageneration-match'] = str(
            preconditions.meta_gen_match)

  def _AddApiVersionToHeaders(self, headers):
    if self.provider == 'gs':
      headers['x-goog-api-version'] = self.api_version

  def _GetMD5FromETag(self, src_etag):
    """Returns an MD5 from the etag iff the etag is a valid MD5 hash.

    Args:
      src_etag: Object etag for which to return the MD5.

    Returns:
      MD5 in hex string format, or None.
    """
    if src_etag and MD5_REGEX.search(src_etag):
      return src_etag.strip('"\'').lower()

  def _StorageUriForBucket(self, bucket):
    """Returns a boto storage_uri for the given bucket name.

    Args:
      bucket: Bucket name (string).

    Returns:
      Boto storage_uri for the bucket.
    """
    return boto.storage_uri(
        '%s://%s' % (self.provider, bucket),
        suppress_consec_slashes=False,
        bucket_storage_uri_class=self.bucket_storage_uri_class,
        debug=self.debug)

  def _StorageUriForObject(self, bucket, object_name, generation=None):
    """Returns a boto storage_uri for the given object.

    Args:
      bucket: Bucket name (string).
      object_name: Object name (string).
      generation: Generation or version_id of object.  If None, live version
                  of the object is used.

    Returns:
      Boto storage_uri for the object.
    """
    uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
    if generation:
      uri_string += '#%s' % generation
    return boto.storage_uri(
        uri_string, suppress_consec_slashes=False,
        bucket_storage_uri_class=self.bucket_storage_uri_class,
        debug=self.debug)

  def _GetBotoKey(self, bucket_name, object_name, generation=None):
    """Gets the boto key for an object.

    Args:
      bucket_name: Bucket containing the object.
      object_name: Object name.
      generation: Generation or version of the object to retrieve.

    Returns:
      Boto key for the object.
    """
    object_uri = self._StorageUriForObject(bucket_name, object_name,
                                           generation=generation)
    try:
      key = object_uri.get_key()
      if not key:
        raise CreateObjectNotFoundException('404', self.provider,
                                            bucket_name, object_name,
                                            generation=generation)
      return key
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                       object_name=object_name,
                                       generation=generation)

  def _ListToGetFields(self, list_fields=None):
    """Removes 'items/' from the input fields and converts it to a set.

    This way field sets requested for ListBucket/ListObject can be used in
    _BotoBucketToBucket and _BotoKeyToObject calls.

    Args:
      list_fields: Iterable fields usable in ListBucket/ListObject calls.

    Returns:
      Set of fields usable in GetBucket/GetObject or
      _BotoBucketToBucket/_BotoKeyToObject calls.
    """
    if list_fields:
      get_fields = set()
      for field in list_fields:
        if field in ['kind', 'nextPageToken', 'prefixes']:
          # These are not actually object / bucket metadata fields.
          # They are fields specific to listing, so we don't consider them.
          continue
        get_fields.add(re.sub(r'items/', '', field))
      return get_fields

  # pylint: disable=too-many-statements
  def _BotoBucketToBucket(self, bucket, fields=None):
    """Constructs an apitools Bucket from a boto bucket.

    Args:
      bucket: Boto bucket.
      fields: If present, construct the apitools Bucket with only this set of
              metadata fields.

    Returns:
      apitools Bucket.
    """
    bucket_uri = self._StorageUriForBucket(bucket.name)

    cloud_api_bucket = apitools_messages.Bucket(name=bucket.name,
                                                id=bucket.name)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    if self.provider == 'gs':
      if not fields or 'storageClass' in fields:
        if hasattr(bucket, 'get_storage_class'):
          cloud_api_bucket.storageClass = bucket.get_storage_class()
      if not fields or 'acl' in fields:
        for acl in AclTranslation.BotoBucketAclToMessage(
            bucket.get_acl(headers=headers)):
          try:
            cloud_api_bucket.acl.append(acl)
          except TRANSLATABLE_BOTO_EXCEPTIONS, e:
            translated_exception = self._TranslateBotoException(
                e, bucket_name=bucket.name)
            if (translated_exception and
                isinstance(translated_exception,
                           AccessDeniedException)):
              # JSON API doesn't differentiate between a blank ACL list
              # and an access denied, so this is intentionally left blank.
              pass
            else:
              self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'cors' in fields:
        try:
          boto_cors = bucket_uri.get_cors()
          cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors)
        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'defaultObjectAcl' in fields:
        for acl in AclTranslation.BotoObjectAclToMessage(
            bucket.get_def_acl(headers=headers)):
          try:
            cloud_api_bucket.defaultObjectAcl.append(acl)
          except TRANSLATABLE_BOTO_EXCEPTIONS, e:
            translated_exception = self._TranslateBotoException(
                e, bucket_name=bucket.name)
            if (translated_exception and
                isinstance(translated_exception,
                           AccessDeniedException)):
              # JSON API doesn't differentiate between a blank ACL list
              # and an access denied, so this is intentionally left blank.
              pass
            else:
              self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'lifecycle' in fields:
        try:
          boto_lifecycle = bucket_uri.get_lifecycle_config()
          cloud_api_bucket.lifecycle = (
              LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle))
        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'logging' in fields:
        try:
          boto_logging = bucket_uri.get_logging_config()
          if boto_logging and 'Logging' in boto_logging:
            logging_config = boto_logging['Logging']
            log_object_prefix_present = 'LogObjectPrefix' in logging_config
            log_bucket_present = 'LogBucket' in logging_config
            if log_object_prefix_present or log_bucket_present:
              cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue()
              if log_object_prefix_present:
                cloud_api_bucket.logging.logObjectPrefix = (
                    logging_config['LogObjectPrefix'])
              if log_bucket_present:
                cloud_api_bucket.logging.logBucket = logging_config['LogBucket']
        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'website' in fields:
        try:
          boto_website = bucket_uri.get_website_config()
          if boto_website and 'WebsiteConfiguration' in boto_website:
            website_config = boto_website['WebsiteConfiguration']
            main_page_suffix_present = 'MainPageSuffix' in website_config
            not_found_page_present = 'NotFoundPage' in website_config
            if main_page_suffix_present or not_found_page_present:
              cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue()
              if main_page_suffix_present:
                cloud_api_bucket.website.mainPageSuffix = (
                    website_config['MainPageSuffix'])
              if not_found_page_present:
                cloud_api_bucket.website.notFoundPage = (
                    website_config['NotFoundPage'])
        except TRANSLATABLE_BOTO_EXCEPTIONS, e:
          self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
      if not fields or 'location' in fields:
        cloud_api_bucket.location = bucket_uri.get_location()
    if not fields or 'versioning' in fields:
      versioning = bucket_uri.get_versioning_config(headers=headers)
      if versioning:
        if (self.provider == 's3' and 'Versioning' in versioning and
            versioning['Versioning'] == 'Enabled'):
          cloud_api_bucket.versioning = (
              apitools_messages.Bucket.VersioningValue(enabled=True))
        elif self.provider == 'gs':
          cloud_api_bucket.versioning = (
              apitools_messages.Bucket.VersioningValue(enabled=True))

    # For S3 long bucket listing we do not support CORS, lifecycle, website, and
    # logging translation. The individual commands can be used to get
    # the XML equivalents for S3.
    return cloud_api_bucket

  def _BotoKeyToObject(self, key, fields=None):
    """Constructs an apitools Object from a boto key.

    Args:
      key: Boto key to construct Object from.
      fields: If present, construct the apitools Object with only this set of
              metadata fields.

    Returns:
      apitools Object corresponding to key.
    """
    custom_metadata = None
    if not fields or 'metadata' in fields:
      custom_metadata = self._TranslateBotoKeyCustomMetadata(key)
    cache_control = None
    if not fields or 'cacheControl' in fields:
      cache_control = getattr(key, 'cache_control', None)
    component_count = None
    if not fields or 'componentCount' in fields:
      component_count = getattr(key, 'component_count', None)
    content_disposition = None
    if not fields or 'contentDisposition' in fields:
      content_disposition = getattr(key, 'content_disposition', None)
    # Other fields like updated and ACL depend on the generation
    # of the object, so populate that regardless of whether it was requested.
    generation = self._TranslateBotoKeyGeneration(key)
    metageneration = None
    if not fields or 'metageneration' in fields:
      metageneration = self._TranslateBotoKeyMetageneration(key)
    updated = None
    # Translation code to avoid a dependency on dateutil.
    if not fields or 'updated' in fields:
      updated = self._TranslateBotoKeyTimestamp(key)
    etag = None
    if not fields or 'etag' in fields:
      etag = getattr(key, 'etag', None)
      if etag:
        etag = etag.strip('"\'')
    crc32c = None
    if not fields or 'crc32c' in fields:
      if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes:
        crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n')
    md5_hash = None
    if not fields or 'md5Hash' in fields:
      if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes:
        md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n')
      elif self._GetMD5FromETag(getattr(key, 'etag', None)):
        md5_hash = Base64EncodeHash(self._GetMD5FromETag(key.etag))
      elif self.provider == 's3':
        # S3 etags are MD5s for non-multi-part objects, but multi-part objects
        # (which include all objects >= 5 GB) have a custom checksum
        # implementation that is not currently supported by gsutil.
        self.logger.warn(
            'Non-MD5 etag (%s) present for key %s, data integrity checks are '
            'not possible.', key.etag, key)

    # Serialize the boto key in the media link if it is requested.  This
    # way we can later access the key without adding an HTTP call.
    media_link = None
    if not fields or 'mediaLink' in fields:
      media_link = binascii.b2a_base64(
          pickle.dumps(key, pickle.HIGHEST_PROTOCOL))
    size = None
    if not fields or 'size' in fields:
      size = key.size or 0
    storage_class = None
    if not fields or 'storageClass' in fields:
      storage_class = getattr(key, 'storage_class', None)

    cloud_api_object = apitools_messages.Object(
        bucket=key.bucket.name,
        name=key.name,
        size=size,
        contentEncoding=key.content_encoding,
        contentLanguage=key.content_language,
        contentType=key.content_type,
        cacheControl=cache_control,
        contentDisposition=content_disposition,
        etag=etag,
        crc32c=crc32c,
        md5Hash=md5_hash,
        generation=generation,
        metageneration=metageneration,
        componentCount=component_count,
        updated=updated,
        metadata=custom_metadata,
        mediaLink=media_link,
        storageClass=storage_class)

    # Remaining functions amend cloud_api_object.
    self._TranslateDeleteMarker(key, cloud_api_object)
    if not fields or 'acl' in fields:
      generation_str = GenerationFromUrlAndString(
          StorageUrlFromString(self.provider), generation)
      self._TranslateBotoKeyAcl(key, cloud_api_object,
                                generation=generation_str)

    return cloud_api_object

  def _TranslateBotoKeyCustomMetadata(self, key):
    """Populates an apitools message from custom metadata in the boto key."""
    custom_metadata = None
    if getattr(key, 'metadata', None):
      custom_metadata = apitools_messages.Object.MetadataValue(
          additionalProperties=[])
      for k, v in key.metadata.iteritems():
        if k.lower() == 'content-language':
          # Work around content-language being inserted into custom metadata.
          continue
        custom_metadata.additionalProperties.append(
            apitools_messages.Object.MetadataValue.AdditionalProperty(
                key=k, value=v))
    return custom_metadata

  def _TranslateBotoKeyGeneration(self, key):
    """Returns the generation/version_id number from the boto key if present."""
    generation = None
    if self.provider == 'gs':
      if getattr(key, 'generation', None):
        generation = long(key.generation)
    elif self.provider == 's3':
      if getattr(key, 'version_id', None):
        generation = EncodeStringAsLong(key.version_id)
    return generation

  def _TranslateBotoKeyMetageneration(self, key):
    """Returns the metageneration number from the boto key if present."""
    metageneration = None
    if self.provider == 'gs':
      if getattr(key, 'metageneration', None):
        metageneration = long(key.metageneration)
    return metageneration

  def _TranslateBotoKeyTimestamp(self, key):
    """Parses the timestamp from the boto key into an datetime object.

    This avoids a dependency on dateutil.

    Args:
      key: Boto key to get timestamp from.

    Returns:
      datetime object if string is parsed successfully, None otherwise.
    """
    if key.last_modified:
      if '.' in key.last_modified:
        key_us_timestamp = key.last_modified.rstrip('Z') + '000Z'
      else:
        key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z'
      fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
      try:
        return datetime.datetime.strptime(key_us_timestamp, fmt)
      except ValueError:
        try:
          # Try alternate format
          fmt = '%a, %d %b %Y %H:%M:%S %Z'
          return datetime.datetime.strptime(key.last_modified, fmt)
        except ValueError:
          # Could not parse the time; leave updated as None.
          return None

  def _TranslateDeleteMarker(self, key, cloud_api_object):
    """Marks deleted objects with a metadata value (for S3 compatibility)."""
    if isinstance(key, DeleteMarker):
      if not cloud_api_object.metadata:
        cloud_api_object.metadata = apitools_messages.Object.MetadataValue()
        cloud_api_object.metadata.additionalProperties = []
      cloud_api_object.metadata.additionalProperties.append(
          apitools_messages.Object.MetadataValue.AdditionalProperty(
              key=S3_DELETE_MARKER_GUID, value=True))

  def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None):
    """Updates cloud_api_object with the ACL from the boto key."""
    storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name,
                                                    generation=generation)
    headers = {}
    self._AddApiVersionToHeaders(headers)
    try:
      if self.provider == 'gs':
        key_acl = storage_uri_for_key.get_acl(headers=headers)
        # key.get_acl() does not support versioning so we need to use
        # storage_uri to ensure we're getting the versioned ACL.
        for acl in AclTranslation.BotoObjectAclToMessage(key_acl):
          cloud_api_object.acl.append(acl)
      if self.provider == 's3':
        key_acl = key.get_xml_acl(headers=headers)
        # ACLs for s3 are different and we use special markers to represent
        # them in the gsutil Cloud API.
        AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl)
    except boto.exception.GSResponseError, e:
      if e.status == 403:
        # Consume access denied exceptions to mimic JSON behavior of simply
        # returning None if sufficient permission is not present.  The caller
        # needs to handle the case where the ACL is not populated.
        pass
      else:
        raise

  def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None,
                                  generation=None, not_found_exception=None):
    """Translates a Boto exception and raises the translated or original value.

    Args:
      e: Any Exception.
      bucket_name: Optional bucket name in request that caused the exception.
      object_name: Optional object name in request that caused the exception.
      generation: Optional generation in request that caused the exception.
      not_found_exception: Optional exception to raise in the not-found case.

    Raises:
      Translated CloudApi exception, or the original exception if it was not
      translatable.
    """
    translated_exception = self._TranslateBotoException(
        e, bucket_name=bucket_name, object_name=object_name,
        generation=generation, not_found_exception=not_found_exception)
    if translated_exception:
      raise translated_exception
    else:
      raise

  def _TranslateBotoException(self, e, bucket_name=None, object_name=None,
                              generation=None, not_found_exception=None):
    """Translates boto exceptions into their gsutil Cloud API equivalents.

    Args:
      e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
      bucket_name: Optional bucket name in request that caused the exception.
      object_name: Optional object name in request that caused the exception.
      generation: Optional generation in request that caused the exception.
      not_found_exception: Optional exception to raise in the not-found case.

    Returns:
      CloudStorageApiServiceException for translatable exceptions, None
      otherwise.

    Because we're using isinstance, check for subtypes first.
    """
    if isinstance(e, boto.exception.StorageResponseError):
      if e.status == 400:
        return BadRequestException(e.code, status=e.status, body=e.body)
      elif e.status == 401 or e.status == 403:
        return AccessDeniedException(e.code, status=e.status, body=e.body)
      elif e.status == 404:
        if not_found_exception:
          # The exception is pre-constructed prior to translation; the HTTP
          # status code isn't available at that time.
          setattr(not_found_exception, 'status', e.status)
          return not_found_exception
        elif bucket_name:
          if object_name:
            return CreateObjectNotFoundException(e.status, self.provider,
                                                 bucket_name, object_name,
                                                 generation=generation)
          return CreateBucketNotFoundException(e.status, self.provider,
                                               bucket_name)
        return NotFoundException(e.message, status=e.status, body=e.body)

      elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code:
        return NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
                                 status=e.status, body=e.body)
      elif e.status == 410:
        # 410 errors should always cause us to start over - either the UploadID
        # has expired or there was a server-side problem that requires starting
        # the upload over from scratch.
        return ResumableUploadStartOverException(e.message)
      elif e.status == 412:
        return PreconditionException(e.code, status=e.status, body=e.body)
    if isinstance(e, boto.exception.StorageCreateError):
      return ServiceException('Bucket already exists.', status=e.status,
                              body=e.body)

    if isinstance(e, boto.exception.BotoServerError):
      return ServiceException(e.message, status=e.status, body=e.body)

    if isinstance(e, boto.exception.InvalidUriError):
      # Work around textwrap when searching for this string.
      if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
        return NotFoundException(e.message, status=404)
      return InvalidUrlError(e.message)

    if isinstance(e, boto.exception.ResumableUploadException):
      if e.disposition == boto.exception.ResumableTransferDisposition.ABORT:
        return ResumableUploadAbortException(e.message)
      elif (e.disposition ==
            boto.exception.ResumableTransferDisposition.START_OVER):
        return ResumableUploadStartOverException(e.message)
      else:
        return ResumableUploadException(e.message)

    if isinstance(e, boto.exception.ResumableDownloadException):
      return ResumableDownloadException(e.message)

    return None

  # For function docstrings, see CloudApiDelegator class.
  def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False):
    """See CloudApiDelegator class for function doc strings."""
    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      if def_obj_acl:
        return uri.get_def_acl()
      else:
        return uri.get_acl()
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

  def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
                           def_obj_acl=False):
    """See CloudApiDelegator class for function doc strings."""
    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      if canned:
        if def_obj_acl:
          canned_acls = uri.canned_acls()
          if acl_text not in canned_acls:
            raise CommandException('Invalid canned ACL "%s".' % acl_text)
          uri.set_def_acl(acl_text, uri.object_name)
        else:
          canned_acls = uri.canned_acls()
          if acl_text not in canned_acls:
            raise CommandException('Invalid canned ACL "%s".' % acl_text)
          uri.set_acl(acl_text, uri.object_name)
      else:
        if def_obj_acl:
          uri.set_def_xml_acl(acl_text, uri.object_name)
        else:
          uri.set_xml_acl(acl_text, uri.object_name)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

  # pylint: disable=catching-non-exception
  def XmlPassThroughSetCors(self, cors_text, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    # Parse XML document and convert into Cors object.
    if storage_url.scheme == 's3':
      cors_obj = S3Cors()
    else:
      cors_obj = Cors()
    h = handler.XmlHandler(cors_obj, None)
    try:
      xml.sax.parseString(cors_text, h)
    except SaxExceptions.SAXParseException, e:
      raise CommandException('Requested CORS is invalid: %s at line %s, '
                             'column %s' % (e.getMessage(), e.getLineNumber(),
                                            e.getColumnNumber()))

    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      uri.set_cors(cors_obj, False)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

  def XmlPassThroughGetCors(self, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    uri = boto.storage_uri(
        storage_url.url_string, suppress_consec_slashes=False,
        bucket_storage_uri_class=self.bucket_storage_uri_class,
        debug=self.debug)
    try:
      cors = uri.get_cors(False)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

    parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8))
    # Pretty-print the XML to make it more easily human editable.
    return parsed_xml.toprettyxml(indent='    ')

  def XmlPassThroughGetLifecycle(self, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      lifecycle = uri.get_lifecycle_config(False)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

    parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8))
    # Pretty-print the XML to make it more easily human editable.
    return parsed_xml.toprettyxml(indent='    ')

  def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    # Parse XML document and convert into lifecycle object.
    if storage_url.scheme == 's3':
      lifecycle_obj = S3Lifecycle()
    else:
      lifecycle_obj = LifecycleConfig()
    h = handler.XmlHandler(lifecycle_obj, None)
    try:
      xml.sax.parseString(lifecycle_text, h)
    except SaxExceptions.SAXParseException, e:
      raise CommandException(
          'Requested lifecycle config is invalid: %s at line %s, column %s' %
          (e.getMessage(), e.getLineNumber(), e.getColumnNumber()))

    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      uri.configure_lifecycle(lifecycle_obj, False)
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

  def XmlPassThroughGetLogging(self, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      logging_config_xml = UnaryDictToXml(uri.get_logging_config())
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

    return XmlParseString(logging_config_xml).toprettyxml()

  def XmlPassThroughGetWebsite(self, storage_url):
    """See CloudApiDelegator class for function doc strings."""
    try:
      uri = boto.storage_uri(
          storage_url.url_string, suppress_consec_slashes=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class,
          debug=self.debug)
      web_config_xml = UnaryDictToXml(uri.get_website_config())
    except TRANSLATABLE_BOTO_EXCEPTIONS, e:
      self._TranslateExceptionAndRaise(e)

    return XmlParseString(web_config_xml).toprettyxml()