# -*- coding: utf-8 -*-
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
from __future__ import absolute_import
import base64
import binascii
import datetime
import errno
import httplib
import json
import multiprocessing
import os
import pickle
import random
import re
import socket
import tempfile
import textwrap
import threading
import time
import xml
from xml.dom.minidom import parseString as XmlParseString
from xml.sax import _exceptions as SaxExceptions
import boto
from boto import handler
from boto.exception import ResumableDownloadException as BotoResumableDownloadException
from boto.exception import ResumableTransferDisposition
from boto.gs.cors import Cors
from boto.gs.lifecycle import LifecycleConfig
from boto.s3.cors import CORSConfiguration as S3Cors
from boto.s3.deletemarker import DeleteMarker
from boto.s3.lifecycle import Lifecycle as S3Lifecycle
from boto.s3.prefix import Prefix
from gslib.boto_resumable_upload import BotoResumableUpload
from gslib.cloud_api import AccessDeniedException
from gslib.cloud_api import ArgumentException
from gslib.cloud_api import BadRequestException
from gslib.cloud_api import CloudApi
from gslib.cloud_api import NotEmptyException
from gslib.cloud_api import NotFoundException
from gslib.cloud_api import PreconditionException
from gslib.cloud_api import ResumableDownloadException
from gslib.cloud_api import ResumableUploadAbortException
from gslib.cloud_api import ResumableUploadException
from gslib.cloud_api import ResumableUploadStartOverException
from gslib.cloud_api import ServiceException
from gslib.cloud_api_helper import ValidateDstObjectMetadata
# Imported for boto AuthHandler purposes.
import gslib.devshell_auth_plugin # pylint: disable=unused-import
from gslib.exception import CommandException
from gslib.exception import InvalidUrlError
from gslib.hashing_helper import Base64EncodeHash
from gslib.hashing_helper import Base64ToHexHash
from gslib.project_id import GOOG_PROJ_ID_HDR
from gslib.project_id import PopulateProjectId
from gslib.storage_url import StorageUrlFromString
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.translation_helper import AclTranslation
from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
from gslib.translation_helper import CorsTranslation
from gslib.translation_helper import CreateBucketNotFoundException
from gslib.translation_helper import CreateNotFoundExceptionForObjectWrite
from gslib.translation_helper import CreateObjectNotFoundException
from gslib.translation_helper import DEFAULT_CONTENT_TYPE
from gslib.translation_helper import EncodeStringAsLong
from gslib.translation_helper import GenerationFromUrlAndString
from gslib.translation_helper import HeadersFromObjectMetadata
from gslib.translation_helper import LifecycleTranslation
from gslib.translation_helper import REMOVE_CORS_CONFIG
from gslib.translation_helper import S3MarkerAclFromObjectMetadata
from gslib.util import ConfigureNoOpAuthIfNeeded
from gslib.util import DEFAULT_FILE_BUFFER_SIZE
from gslib.util import GetMaxRetryDelay
from gslib.util import GetNumRetries
from gslib.util import S3_DELETE_MARKER_GUID
from gslib.util import TWO_MIB
from gslib.util import UnaryDictToXml
from gslib.util import UTF8
from gslib.util import XML_PROGRESS_CALLBACKS
TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
boto.exception.InvalidUriError,
boto.exception.ResumableDownloadException,
boto.exception.ResumableUploadException,
boto.exception.StorageCreateError,
boto.exception.StorageResponseError)
# pylint: disable=global-at-module-level
global boto_auth_initialized, boto_auth_initialized_lock
# If multiprocessing is available, these will be overridden to process-safe
# variables in InitializeMultiprocessingVariables.
boto_auth_initialized_lock = threading.Lock()
boto_auth_initialized = False
NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
flags=re.DOTALL)
# Determines whether an etag is a valid MD5.
MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')
def InitializeMultiprocessingVariables(): # pylint: disable=invalid-name
"""Perform necessary initialization for multiprocessing.
See gslib.command.InitializeMultiprocessingVariables for an explanation
of why this is necessary.
"""
# pylint: disable=global-variable-undefined
global boto_auth_initialized, boto_auth_initialized_lock
boto_auth_initialized_lock = gslib.util.CreateLock()
boto_auth_initialized = multiprocessing.Value('i', 0)
class DownloadProxyCallbackHandler(object):
"""Intermediary callback to keep track of the number of bytes downloaded."""
def __init__(self, start_byte, callback):
self._start_byte = start_byte
self._callback = callback
def call(self, bytes_downloaded, total_size):
"""Saves necessary data and then calls the given Cloud API callback.
Args:
bytes_downloaded: Number of bytes processed so far.
total_size: Total size of the ongoing operation.
"""
if self._callback:
self._callback(self._start_byte + bytes_downloaded, total_size)
class BotoTranslation(CloudApi):
"""Boto-based XML translation implementation of gsutil Cloud API.
This class takes gsutil Cloud API objects, translates them to XML service
calls, and translates the results back into gsutil Cloud API objects for
use by the caller.
"""
def __init__(self, bucket_storage_uri_class, logger, provider=None,
credentials=None, debug=0, trace_token=None):
"""Performs necessary setup for interacting with the cloud storage provider.
Args:
bucket_storage_uri_class: boto storage_uri class, used by APIs that
provide boto translation or mocking.
logger: logging.logger for outputting log messages.
provider: Provider prefix describing cloud storage provider to connect to.
'gs' and 's3' are supported. Function implementations ignore
the provider argument and use this one instead.
credentials: Unused.
debug: Debug level for the API implementation (0..3).
trace_token: Unused in this subclass.
"""
super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
provider=provider, debug=debug)
_ = credentials
# pylint: disable=global-variable-undefined, global-variable-not-assigned
global boto_auth_initialized, boto_auth_initialized_lock
with boto_auth_initialized_lock:
ConfigureNoOpAuthIfNeeded()
if isinstance(boto_auth_initialized, bool):
boto_auth_initialized = True
else:
boto_auth_initialized.value = 1
self.api_version = boto.config.get_value(
'GSUtil', 'default_api_version', '1')
def GetBucket(self, bucket_name, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
bucket_uri = self._StorageUriForBucket(bucket_name)
headers = {}
self._AddApiVersionToHeaders(headers)
try:
return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
headers=headers),
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
def ListBuckets(self, project_id=None, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
get_fields = self._ListToGetFields(list_fields=fields)
headers = {}
self._AddApiVersionToHeaders(headers)
if self.provider == 'gs':
headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
try:
provider_uri = boto.storage_uri(
'%s://' % self.provider,
suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
buckets_iter = provider_uri.get_all_buckets(headers=headers)
for bucket in buckets_iter:
if self.provider == 's3' and bucket.name.lower() != bucket.name:
# S3 listings can return buckets with upper-case names, but boto
# can't successfully call them.
continue
yield self._BotoBucketToBucket(bucket, fields=get_fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
def PatchBucket(self, bucket_name, metadata, canned_acl=None,
canned_def_acl=None, preconditions=None, provider=None,
fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
bucket_uri = self._StorageUriForBucket(bucket_name)
headers = {}
self._AddApiVersionToHeaders(headers)
try:
self._AddPreconditionsToHeaders(preconditions, headers)
if metadata.acl:
boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
if canned_acl:
canned_acls = bucket_uri.canned_acls()
if canned_acl not in canned_acls:
raise CommandException('Invalid canned ACL "%s".' % canned_acl)
bucket_uri.set_acl(canned_acl, bucket_uri.object_name)
if canned_def_acl:
canned_acls = bucket_uri.canned_acls()
if canned_def_acl not in canned_acls:
raise CommandException('Invalid canned ACL "%s".' % canned_def_acl)
bucket_uri.set_def_acl(canned_def_acl, bucket_uri.object_name)
if metadata.cors:
if metadata.cors == REMOVE_CORS_CONFIG:
metadata.cors = []
boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
bucket_uri.set_cors(boto_cors, False)
if metadata.defaultObjectAcl:
boto_acl = AclTranslation.BotoAclFromMessage(
metadata.defaultObjectAcl)
bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
if metadata.lifecycle:
boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
metadata.lifecycle)
bucket_uri.configure_lifecycle(boto_lifecycle, False)
if metadata.logging:
if self.provider == 'gs':
headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
bucket_uri.enable_logging(metadata.logging.logBucket,
metadata.logging.logObjectPrefix,
False, headers)
else: # Logging field is present and empty. Disable logging.
bucket_uri.disable_logging(False, headers)
if metadata.versioning:
bucket_uri.configure_versioning(metadata.versioning.enabled,
headers=headers)
if metadata.website:
main_page_suffix = metadata.website.mainPageSuffix
error_page = metadata.website.notFoundPage
bucket_uri.set_website_config(main_page_suffix, error_page)
return self.GetBucket(bucket_name, fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
def CreateBucket(self, bucket_name, project_id=None, metadata=None,
provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
bucket_uri = self._StorageUriForBucket(bucket_name)
location = ''
if metadata and metadata.location:
location = metadata.location
# Pass storage_class param only if this is a GCS bucket. (In S3 the
# storage class is specified on the key object.)
headers = {}
if bucket_uri.scheme == 'gs':
self._AddApiVersionToHeaders(headers)
headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
storage_class = ''
if metadata and metadata.storageClass:
storage_class = metadata.storageClass
try:
bucket_uri.create_bucket(headers=headers, location=location,
storage_class=storage_class)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
else:
try:
bucket_uri.create_bucket(headers=headers, location=location)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
return self.GetBucket(bucket_name, fields=fields)
def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
"""See CloudApi class for function doc strings."""
_ = provider, preconditions
bucket_uri = self._StorageUriForBucket(bucket_name)
headers = {}
self._AddApiVersionToHeaders(headers)
try:
bucket_uri.delete_bucket(headers=headers)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
translated_exception = self._TranslateBotoException(
e, bucket_name=bucket_name)
if (translated_exception and
'BucketNotEmpty' in translated_exception.reason):
try:
if bucket_uri.get_versioning_config():
if self.provider == 's3':
raise NotEmptyException(
'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
'support listing or removing S3 DeleteMarkers, so you may '
'need to delete these using another tool to successfully '
'delete this bucket.' % bucket_name, status=e.status)
raise NotEmptyException(
'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status)
else:
raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
status=e.status)
except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
elif translated_exception and translated_exception.status == 404:
raise NotFoundException('Bucket %s does not exist.' % bucket_name)
else:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
def ListObjects(self, bucket_name, prefix=None, delimiter=None,
all_versions=None, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
get_fields = self._ListToGetFields(list_fields=fields)
bucket_uri = self._StorageUriForBucket(bucket_name)
headers = {}
yield_prefixes = fields is None or 'prefixes' in fields
yield_objects = fields is None or any(
field.startswith('items/') for field in fields)
self._AddApiVersionToHeaders(headers)
try:
objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
delimiter=delimiter or '',
all_versions=all_versions,
headers=headers)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
try:
for key in objects_iter:
if yield_prefixes and isinstance(key, Prefix):
yield CloudApi.CsObjectOrPrefix(key.name,
CloudApi.CsObjectOrPrefixType.PREFIX)
elif yield_objects:
key_to_convert = key
# Listed keys are populated with these fields during bucket listing.
key_http_fields = set(['bucket', 'etag', 'name', 'updated',
'generation', 'metageneration', 'size'])
# When fields == None, the caller is requesting all possible fields.
# If the caller requested any fields that are not populated by bucket
# listing, we'll need to make a separate HTTP call for each object to
# get its metadata and populate the remaining fields with the result.
if not get_fields or (get_fields and not
get_fields.issubset(key_http_fields)):
generation = None
if getattr(key, 'generation', None):
generation = key.generation
if getattr(key, 'version_id', None):
generation = key.version_id
key_to_convert = self._GetBotoKey(bucket_name, key.name,
generation=generation)
return_object = self._BotoKeyToObject(key_to_convert,
fields=get_fields)
yield CloudApi.CsObjectOrPrefix(return_object,
CloudApi.CsObjectOrPrefixType.OBJECT)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
def GetObjectMetadata(self, bucket_name, object_name, generation=None,
provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
try:
return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name,
generation=generation),
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
def _CurryDigester(self, digester_object):
"""Curries a digester object into a form consumable by boto.
Key instantiates its own digesters by calling hash_algs[alg]() [note there
are no arguments to this function]. So in order to pass in our caught-up
digesters during a resumable download, we need to pass the digester
object but don't get to look it up based on the algorithm name. Here we
use a lambda to make lookup implicit.
Args:
digester_object: Input object to be returned by the created function.
Returns:
A function which when called will return the input object.
"""
return lambda: digester_object
def GetObjectMedia(
self, bucket_name, object_name, download_stream, provider=None,
generation=None, object_size=None,
download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
start_byte=0, end_byte=None, progress_callback=None,
serialization_data=None, digesters=None):
"""See CloudApi class for function doc strings."""
# This implementation will get the object metadata first if we don't pass it
# in via serialization_data.
headers = {}
self._AddApiVersionToHeaders(headers)
if 'accept-encoding' not in headers:
headers['accept-encoding'] = 'gzip'
if end_byte is not None:
headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
elif start_byte > 0:
headers['range'] = 'bytes=%s-' % (start_byte)
elif start_byte < 0:
headers['range'] = 'bytes=%s' % (start_byte)
# Since in most cases we already made a call to get the object metadata,
# here we avoid an extra HTTP call by unpickling the key. This is coupled
# with the implementation in _BotoKeyToObject.
if serialization_data:
serialization_dict = json.loads(serialization_data)
key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
else:
key = self._GetBotoKey(bucket_name, object_name, generation=generation)
if digesters and self.provider == 'gs':
hash_algs = {}
for alg in digesters:
hash_algs[alg] = self._CurryDigester(digesters[alg])
else:
hash_algs = {}
total_size = object_size or 0
if serialization_data:
total_size = json.loads(serialization_data)['total_size']
if total_size:
num_progress_callbacks = max(int(total_size) / TWO_MIB,
XML_PROGRESS_CALLBACKS)
else:
num_progress_callbacks = XML_PROGRESS_CALLBACKS
try:
if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
self._PerformResumableDownload(
download_stream, start_byte, end_byte, key,
headers=headers, callback=progress_callback,
num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
self._PerformSimpleDownload(
download_stream, key, progress_callback=progress_callback,
num_progress_callbacks=num_progress_callbacks, headers=headers,
hash_algs=hash_algs)
else:
raise ArgumentException('Unsupported DownloadStrategy: %s' %
download_strategy)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
if self.provider == 's3':
if digesters:
class HashToDigester(object):
"""Wrapper class to expose hash digests.
boto creates its own digesters in s3's get_file, returning on-the-fly
hashes only by way of key.local_hashes. To propagate the digest back
to the caller, this stub class implements the digest() function.
"""
def __init__(self, hash_val):
self.hash_val = hash_val
def digest(self): # pylint: disable=invalid-name
return self.hash_val
for alg_name in digesters:
if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
start_byte != 0) or
not ((getattr(key, 'local_hashes', None) and
alg_name in key.local_hashes))):
# For resumable downloads, boto does not provide a mechanism to
# catch up the hash in the case of a partially complete download.
# In this case or in the case where no digest was successfully
# calculated, set the digester to None, which indicates that we'll
# need to manually calculate the hash from the local file once it
# is complete.
digesters[alg_name] = None
else:
# Use the on-the-fly hash.
digesters[alg_name] = HashToDigester(key.local_hashes[alg_name])
def _PerformSimpleDownload(self, download_stream, key, progress_callback=None,
num_progress_callbacks=XML_PROGRESS_CALLBACKS,
headers=None, hash_algs=None):
if not headers:
headers = {}
self._AddApiVersionToHeaders(headers)
try:
key.get_contents_to_file(download_stream, cb=progress_callback,
num_cb=num_progress_callbacks, headers=headers,
hash_algs=hash_algs)
except TypeError: # s3 and mocks do not support hash_algs
key.get_contents_to_file(download_stream, cb=progress_callback,
num_cb=num_progress_callbacks, headers=headers)
def _PerformResumableDownload(self, fp, start_byte, end_byte, key,
headers=None, callback=None,
num_callbacks=XML_PROGRESS_CALLBACKS,
hash_algs=None):
"""Downloads bytes from key to fp, resuming as needed.
Args:
fp: File pointer into which data should be downloaded.
start_byte: Start byte of the download.
end_byte: End byte of the download.
key: Key object from which data is to be downloaded
headers: Headers to send when retrieving the file
callback: (optional) a callback function that will be called to report
progress on the download. The callback should accept two integer
parameters. The first integer represents the number of
bytes that have been successfully transmitted from the service. The
second represents the total number of bytes that need to be
transmitted.
num_callbacks: (optional) If a callback is specified with the callback
parameter, this determines the granularity of the callback
by defining the maximum number of times the callback will be
called during the file transfer.
hash_algs: Dict of hash algorithms to apply to downloaded bytes.
Raises:
ResumableDownloadException on error.
"""
if not headers:
headers = {}
self._AddApiVersionToHeaders(headers)
retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
socket.gaierror)
debug = key.bucket.connection.debug
num_retries = GetNumRetries()
progress_less_iterations = 0
last_progress_byte = start_byte
while True: # Retry as long as we're making progress.
try:
cb_handler = DownloadProxyCallbackHandler(start_byte, callback)
headers = headers.copy()
headers['Range'] = 'bytes=%d-%d' % (start_byte, end_byte)
# Disable AWSAuthConnection-level retry behavior, since that would
# cause downloads to restart from scratch.
try:
key.get_file(fp, headers, cb_handler.call, num_callbacks,
override_num_retries=0, hash_algs=hash_algs)
except TypeError:
key.get_file(fp, headers, cb_handler.call, num_callbacks,
override_num_retries=0)
fp.flush()
# Download succeeded.
return
except retryable_exceptions, e:
if debug >= 1:
self.logger.info('Caught exception (%s)', repr(e))
if isinstance(e, IOError) and e.errno == errno.EPIPE:
# Broken pipe error causes httplib to immediately
# close the socket (http://bugs.python.org/issue5542),
# so we need to close and reopen the key before resuming
# the download.
if self.provider == 's3':
key.get_file(fp, headers, cb_handler.call, num_callbacks,
override_num_retries=0)
else: # self.provider == 'gs'
key.get_file(fp, headers, cb_handler.call, num_callbacks,
override_num_retries=0, hash_algs=hash_algs)
except BotoResumableDownloadException, e:
if (e.disposition ==
ResumableTransferDisposition.ABORT_CUR_PROCESS):
raise ResumableDownloadException(e.message)
else:
if debug >= 1:
self.logger.info('Caught ResumableDownloadException (%s) - will '
'retry', e.message)
# At this point we had a re-tryable failure; see if made progress.
start_byte = fp.tell()
if start_byte > last_progress_byte:
last_progress_byte = start_byte
progress_less_iterations = 0
else:
progress_less_iterations += 1
if progress_less_iterations > num_retries:
# Don't retry any longer in the current process.
raise ResumableDownloadException(
'Too many resumable download attempts failed without '
'progress. You might try this download again later')
# Close the key, in case a previous download died partway
# through and left data in the underlying key HTTP buffer.
# Do this within a try/except block in case the connection is
# closed (since key.close() attempts to do a final read, in which
# case this read attempt would get an IncompleteRead exception,
# which we can safely ignore).
try:
key.close()
except httplib.IncompleteRead:
pass
sleep_time_secs = min(random.random() * (2 ** progress_less_iterations),
GetMaxRetryDelay())
if debug >= 1:
self.logger.info(
'Got retryable failure (%d progress-less in a row).\nSleeping %d '
'seconds before re-trying', progress_less_iterations,
sleep_time_secs)
time.sleep(sleep_time_secs)
def PatchObjectMetadata(self, bucket_name, object_name, metadata,
canned_acl=None, generation=None, preconditions=None,
provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
object_uri = self._StorageUriForObject(bucket_name, object_name,
generation=generation)
headers = {}
self._AddApiVersionToHeaders(headers)
meta_headers = HeadersFromObjectMetadata(metadata, self.provider)
metadata_plus = {}
metadata_minus = set()
metadata_changed = False
for k, v in meta_headers.iteritems():
metadata_changed = True
if v is None:
metadata_minus.add(k)
else:
metadata_plus[k] = v
self._AddPreconditionsToHeaders(preconditions, headers)
if metadata_changed:
try:
object_uri.set_metadata(metadata_plus, metadata_minus, False,
headers=headers)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
if metadata.acl:
boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
try:
object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
if canned_acl:
canned_acls = object_uri.canned_acls()
if canned_acl not in canned_acls:
raise CommandException('Invalid canned ACL "%s".' % canned_acl)
object_uri.set_acl(canned_acl, object_uri.object_name)
return self.GetObjectMetadata(bucket_name, object_name,
generation=generation, fields=fields)
def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
canned_acl=None, progress_callback=None,
headers=None):
dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl,
cb=progress_callback, headers=headers)
def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None,
progress_callback=None, headers=None):
if dst_uri.get_provider().supports_chunked_transfer():
dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
cb=progress_callback, headers=headers)
else:
# Provider doesn't support chunked transfer, so copy to a temporary
# file.
(temp_fh, temp_path) = tempfile.mkstemp()
try:
with open(temp_path, 'wb') as out_fp:
stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
while stream_bytes:
out_fp.write(stream_bytes)
stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
with open(temp_path, 'rb') as in_fp:
dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
headers=headers)
finally:
os.close(temp_fh)
os.unlink(temp_path)
def _PerformResumableUpload(self, key, upload_stream, upload_size,
tracker_callback, canned_acl=None,
serialization_data=None, progress_callback=None,
headers=None):
resumable_upload = BotoResumableUpload(
tracker_callback, self.logger, resume_url=serialization_data)
resumable_upload.SendFile(key, upload_stream, upload_size,
canned_acl=canned_acl, cb=progress_callback,
headers=headers)
def _UploadSetup(self, object_metadata, preconditions=None):
"""Shared upload implementation.
Args:
object_metadata: Object metadata describing destination object.
preconditions: Optional gsutil Cloud API preconditions.
Returns:
Headers dictionary, StorageUri for upload (based on inputs)
"""
ValidateDstObjectMetadata(object_metadata)
headers = HeadersFromObjectMetadata(object_metadata, self.provider)
self._AddApiVersionToHeaders(headers)
if object_metadata.crc32c:
if 'x-goog-hash' in headers:
headers['x-goog-hash'] += (
',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
else:
headers['x-goog-hash'] = (
'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
if object_metadata.md5Hash:
if 'x-goog-hash' in headers:
headers['x-goog-hash'] += (
',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
else:
headers['x-goog-hash'] = (
'md5=%s' % object_metadata.md5Hash.rstrip('\n'))
if 'content-type' in headers and not headers['content-type']:
headers['content-type'] = 'application/octet-stream'
self._AddPreconditionsToHeaders(preconditions, headers)
dst_uri = self._StorageUriForObject(object_metadata.bucket,
object_metadata.name)
return headers, dst_uri
def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
"""Set ACLs on an uploaded object and return its metadata.
Args:
dst_uri: Generation-specific StorageUri describing the object.
object_metadata: Metadata for the object, including an ACL if applicable.
fields: If present, return only these Object metadata fields.
Returns:
gsutil Cloud API Object metadata.
Raises:
CommandException if the object was overwritten / deleted concurrently.
"""
try:
# The XML API does not support if-generation-match for GET requests.
# Therefore, if the object gets overwritten before the ACL and get_key
# operations, the best we can do is warn that it happened.
self._SetObjectAcl(object_metadata, dst_uri)
return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
except boto.exception.InvalidUriError as e:
if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
raise CommandException('\n'.join(textwrap.wrap(
'Uploaded object (%s) was deleted or overwritten immediately '
'after it was uploaded. This can happen if you attempt to upload '
'to the same object multiple times concurrently.' % dst_uri.uri)))
else:
raise
def _SetObjectAcl(self, object_metadata, dst_uri):
"""Sets the ACL (if present in object_metadata) on an uploaded object."""
if object_metadata.acl:
boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
dst_uri.set_xml_acl(boto_acl.to_xml())
elif self.provider == 's3':
s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
if s3_acl:
dst_uri.set_xml_acl(s3_acl)
def UploadObjectResumable(
self, upload_stream, object_metadata, canned_acl=None, preconditions=None,
provider=None, fields=None, size=None, serialization_data=None,
tracker_callback=None, progress_callback=None):
"""See CloudApi class for function doc strings."""
if self.provider == 's3':
# Resumable uploads are not supported for s3.
return self.UploadObject(
upload_stream, object_metadata, canned_acl=canned_acl,
preconditions=preconditions, fields=fields, size=size)
headers, dst_uri = self._UploadSetup(object_metadata,
preconditions=preconditions)
if not tracker_callback:
raise ArgumentException('No tracker callback function set for '
'resumable upload of %s' % dst_uri)
try:
self._PerformResumableUpload(dst_uri.new_key(headers=headers),
upload_stream, size, tracker_callback,
canned_acl=canned_acl,
serialization_data=serialization_data,
progress_callback=progress_callback,
headers=headers)
return self._HandleSuccessfulUpload(dst_uri, object_metadata,
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
not_found_exception = CreateNotFoundExceptionForObjectWrite(
self.provider, object_metadata.bucket)
self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
object_name=object_metadata.name,
not_found_exception=not_found_exception)
def UploadObjectStreaming(self, upload_stream, object_metadata,
canned_acl=None, progress_callback=None,
preconditions=None, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
headers, dst_uri = self._UploadSetup(object_metadata,
preconditions=preconditions)
try:
self._PerformStreamingUpload(
dst_uri, upload_stream, canned_acl=canned_acl,
progress_callback=progress_callback, headers=headers)
return self._HandleSuccessfulUpload(dst_uri, object_metadata,
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
not_found_exception = CreateNotFoundExceptionForObjectWrite(
self.provider, object_metadata.bucket)
self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
object_name=object_metadata.name,
not_found_exception=not_found_exception)
def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
preconditions=None, size=None, progress_callback=None,
provider=None, fields=None):
"""See CloudApi class for function doc strings."""
headers, dst_uri = self._UploadSetup(object_metadata,
preconditions=preconditions)
try:
md5 = None
if object_metadata.md5Hash:
md5 = []
# boto expects hex at index 0, base64 at index 1
md5.append(Base64ToHexHash(object_metadata.md5Hash))
md5.append(object_metadata.md5Hash.strip('\n"\''))
self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
canned_acl=canned_acl,
progress_callback=progress_callback,
headers=headers)
return self._HandleSuccessfulUpload(dst_uri, object_metadata,
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
not_found_exception = CreateNotFoundExceptionForObjectWrite(
self.provider, object_metadata.bucket)
self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
object_name=object_metadata.name,
not_found_exception=not_found_exception)
def DeleteObject(self, bucket_name, object_name, preconditions=None,
generation=None, provider=None):
"""See CloudApi class for function doc strings."""
_ = provider
headers = {}
self._AddApiVersionToHeaders(headers)
self._AddPreconditionsToHeaders(preconditions, headers)
uri = self._StorageUriForObject(bucket_name, object_name,
generation=generation)
try:
uri.delete_key(validate=False, headers=headers)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None,
canned_acl=None, preconditions=None, progress_callback=None,
max_bytes_per_call=None, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
if max_bytes_per_call is not None:
raise NotImplementedError('XML API does not suport max_bytes_per_call')
dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
dst_obj_metadata.name)
# Usually it's okay to treat version_id and generation as
# the same, but in this case the underlying boto call determines the
# provider based on the presence of one or the other.
src_version_id = None
if self.provider == 's3':
src_version_id = src_generation
src_generation = None
headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
self._AddApiVersionToHeaders(headers)
self._AddPreconditionsToHeaders(preconditions, headers)
if canned_acl:
headers[dst_uri.get_provider().acl_header] = canned_acl
preserve_acl = True if dst_obj_metadata.acl else False
if self.provider == 's3':
s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
if s3_acl:
preserve_acl = True
try:
new_key = dst_uri.copy_key(
src_obj_metadata.bucket, src_obj_metadata.name,
preserve_acl=preserve_acl, headers=headers,
src_version_id=src_version_id, src_generation=src_generation)
return self._BotoKeyToObject(new_key, fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
not_found_exception = CreateNotFoundExceptionForObjectWrite(
self.provider, dst_obj_metadata.bucket, src_provider=self.provider,
src_bucket_name=src_obj_metadata.bucket,
src_object_name=src_obj_metadata.name, src_generation=src_generation)
self._TranslateExceptionAndRaise(e, bucket_name=dst_obj_metadata.bucket,
object_name=dst_obj_metadata.name,
not_found_exception=not_found_exception)
def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
preconditions=None, provider=None, fields=None):
"""See CloudApi class for function doc strings."""
_ = provider
ValidateDstObjectMetadata(dst_obj_metadata)
dst_obj_name = dst_obj_metadata.name
dst_obj_metadata.name = None
dst_bucket_name = dst_obj_metadata.bucket
dst_obj_metadata.bucket = None
headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
if not dst_obj_metadata.contentType:
dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
headers['content-type'] = dst_obj_metadata.contentType
self._AddApiVersionToHeaders(headers)
self._AddPreconditionsToHeaders(preconditions, headers)
dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)
src_components = []
for src_obj in src_objs_metadata:
src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
generation=src_obj.generation)
src_components.append(src_uri)
try:
dst_uri.compose(src_components, headers=headers)
return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
fields=fields)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
dst_obj_metadata.name)
def _AddPreconditionsToHeaders(self, preconditions, headers):
"""Adds preconditions (if any) to headers."""
if preconditions and self.provider == 'gs':
if preconditions.gen_match is not None:
headers['x-goog-if-generation-match'] = str(preconditions.gen_match)
if preconditions.meta_gen_match is not None:
headers['x-goog-if-metageneration-match'] = str(
preconditions.meta_gen_match)
def _AddApiVersionToHeaders(self, headers):
if self.provider == 'gs':
headers['x-goog-api-version'] = self.api_version
def _GetMD5FromETag(self, src_etag):
"""Returns an MD5 from the etag iff the etag is a valid MD5 hash.
Args:
src_etag: Object etag for which to return the MD5.
Returns:
MD5 in hex string format, or None.
"""
if src_etag and MD5_REGEX.search(src_etag):
return src_etag.strip('"\'').lower()
def _StorageUriForBucket(self, bucket):
"""Returns a boto storage_uri for the given bucket name.
Args:
bucket: Bucket name (string).
Returns:
Boto storage_uri for the bucket.
"""
return boto.storage_uri(
'%s://%s' % (self.provider, bucket),
suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
def _StorageUriForObject(self, bucket, object_name, generation=None):
"""Returns a boto storage_uri for the given object.
Args:
bucket: Bucket name (string).
object_name: Object name (string).
generation: Generation or version_id of object. If None, live version
of the object is used.
Returns:
Boto storage_uri for the object.
"""
uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
if generation:
uri_string += '#%s' % generation
return boto.storage_uri(
uri_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
def _GetBotoKey(self, bucket_name, object_name, generation=None):
"""Gets the boto key for an object.
Args:
bucket_name: Bucket containing the object.
object_name: Object name.
generation: Generation or version of the object to retrieve.
Returns:
Boto key for the object.
"""
object_uri = self._StorageUriForObject(bucket_name, object_name,
generation=generation)
try:
key = object_uri.get_key()
if not key:
raise CreateObjectNotFoundException('404', self.provider,
bucket_name, object_name,
generation=generation)
return key
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
object_name=object_name,
generation=generation)
def _ListToGetFields(self, list_fields=None):
"""Removes 'items/' from the input fields and converts it to a set.
This way field sets requested for ListBucket/ListObject can be used in
_BotoBucketToBucket and _BotoKeyToObject calls.
Args:
list_fields: Iterable fields usable in ListBucket/ListObject calls.
Returns:
Set of fields usable in GetBucket/GetObject or
_BotoBucketToBucket/_BotoKeyToObject calls.
"""
if list_fields:
get_fields = set()
for field in list_fields:
if field in ['kind', 'nextPageToken', 'prefixes']:
# These are not actually object / bucket metadata fields.
# They are fields specific to listing, so we don't consider them.
continue
get_fields.add(re.sub(r'items/', '', field))
return get_fields
# pylint: disable=too-many-statements
def _BotoBucketToBucket(self, bucket, fields=None):
"""Constructs an apitools Bucket from a boto bucket.
Args:
bucket: Boto bucket.
fields: If present, construct the apitools Bucket with only this set of
metadata fields.
Returns:
apitools Bucket.
"""
bucket_uri = self._StorageUriForBucket(bucket.name)
cloud_api_bucket = apitools_messages.Bucket(name=bucket.name,
id=bucket.name)
headers = {}
self._AddApiVersionToHeaders(headers)
if self.provider == 'gs':
if not fields or 'storageClass' in fields:
if hasattr(bucket, 'get_storage_class'):
cloud_api_bucket.storageClass = bucket.get_storage_class()
if not fields or 'acl' in fields:
for acl in AclTranslation.BotoBucketAclToMessage(
bucket.get_acl(headers=headers)):
try:
cloud_api_bucket.acl.append(acl)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
translated_exception = self._TranslateBotoException(
e, bucket_name=bucket.name)
if (translated_exception and
isinstance(translated_exception,
AccessDeniedException)):
# JSON API doesn't differentiate between a blank ACL list
# and an access denied, so this is intentionally left blank.
pass
else:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'cors' in fields:
try:
boto_cors = bucket_uri.get_cors()
cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'defaultObjectAcl' in fields:
for acl in AclTranslation.BotoObjectAclToMessage(
bucket.get_def_acl(headers=headers)):
try:
cloud_api_bucket.defaultObjectAcl.append(acl)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
translated_exception = self._TranslateBotoException(
e, bucket_name=bucket.name)
if (translated_exception and
isinstance(translated_exception,
AccessDeniedException)):
# JSON API doesn't differentiate between a blank ACL list
# and an access denied, so this is intentionally left blank.
pass
else:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'lifecycle' in fields:
try:
boto_lifecycle = bucket_uri.get_lifecycle_config()
cloud_api_bucket.lifecycle = (
LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle))
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'logging' in fields:
try:
boto_logging = bucket_uri.get_logging_config()
if boto_logging and 'Logging' in boto_logging:
logging_config = boto_logging['Logging']
log_object_prefix_present = 'LogObjectPrefix' in logging_config
log_bucket_present = 'LogBucket' in logging_config
if log_object_prefix_present or log_bucket_present:
cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue()
if log_object_prefix_present:
cloud_api_bucket.logging.logObjectPrefix = (
logging_config['LogObjectPrefix'])
if log_bucket_present:
cloud_api_bucket.logging.logBucket = logging_config['LogBucket']
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'website' in fields:
try:
boto_website = bucket_uri.get_website_config()
if boto_website and 'WebsiteConfiguration' in boto_website:
website_config = boto_website['WebsiteConfiguration']
main_page_suffix_present = 'MainPageSuffix' in website_config
not_found_page_present = 'NotFoundPage' in website_config
if main_page_suffix_present or not_found_page_present:
cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue()
if main_page_suffix_present:
cloud_api_bucket.website.mainPageSuffix = (
website_config['MainPageSuffix'])
if not_found_page_present:
cloud_api_bucket.website.notFoundPage = (
website_config['NotFoundPage'])
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
if not fields or 'location' in fields:
cloud_api_bucket.location = bucket_uri.get_location()
if not fields or 'versioning' in fields:
versioning = bucket_uri.get_versioning_config(headers=headers)
if versioning:
if (self.provider == 's3' and 'Versioning' in versioning and
versioning['Versioning'] == 'Enabled'):
cloud_api_bucket.versioning = (
apitools_messages.Bucket.VersioningValue(enabled=True))
elif self.provider == 'gs':
cloud_api_bucket.versioning = (
apitools_messages.Bucket.VersioningValue(enabled=True))
# For S3 long bucket listing we do not support CORS, lifecycle, website, and
# logging translation. The individual commands can be used to get
# the XML equivalents for S3.
return cloud_api_bucket
def _BotoKeyToObject(self, key, fields=None):
"""Constructs an apitools Object from a boto key.
Args:
key: Boto key to construct Object from.
fields: If present, construct the apitools Object with only this set of
metadata fields.
Returns:
apitools Object corresponding to key.
"""
custom_metadata = None
if not fields or 'metadata' in fields:
custom_metadata = self._TranslateBotoKeyCustomMetadata(key)
cache_control = None
if not fields or 'cacheControl' in fields:
cache_control = getattr(key, 'cache_control', None)
component_count = None
if not fields or 'componentCount' in fields:
component_count = getattr(key, 'component_count', None)
content_disposition = None
if not fields or 'contentDisposition' in fields:
content_disposition = getattr(key, 'content_disposition', None)
# Other fields like updated and ACL depend on the generation
# of the object, so populate that regardless of whether it was requested.
generation = self._TranslateBotoKeyGeneration(key)
metageneration = None
if not fields or 'metageneration' in fields:
metageneration = self._TranslateBotoKeyMetageneration(key)
updated = None
# Translation code to avoid a dependency on dateutil.
if not fields or 'updated' in fields:
updated = self._TranslateBotoKeyTimestamp(key)
etag = None
if not fields or 'etag' in fields:
etag = getattr(key, 'etag', None)
if etag:
etag = etag.strip('"\'')
crc32c = None
if not fields or 'crc32c' in fields:
if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes:
crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n')
md5_hash = None
if not fields or 'md5Hash' in fields:
if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes:
md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n')
elif self._GetMD5FromETag(getattr(key, 'etag', None)):
md5_hash = Base64EncodeHash(self._GetMD5FromETag(key.etag))
elif self.provider == 's3':
# S3 etags are MD5s for non-multi-part objects, but multi-part objects
# (which include all objects >= 5 GB) have a custom checksum
# implementation that is not currently supported by gsutil.
self.logger.warn(
'Non-MD5 etag (%s) present for key %s, data integrity checks are '
'not possible.', key.etag, key)
# Serialize the boto key in the media link if it is requested. This
# way we can later access the key without adding an HTTP call.
media_link = None
if not fields or 'mediaLink' in fields:
media_link = binascii.b2a_base64(
pickle.dumps(key, pickle.HIGHEST_PROTOCOL))
size = None
if not fields or 'size' in fields:
size = key.size or 0
storage_class = None
if not fields or 'storageClass' in fields:
storage_class = getattr(key, 'storage_class', None)
cloud_api_object = apitools_messages.Object(
bucket=key.bucket.name,
name=key.name,
size=size,
contentEncoding=key.content_encoding,
contentLanguage=key.content_language,
contentType=key.content_type,
cacheControl=cache_control,
contentDisposition=content_disposition,
etag=etag,
crc32c=crc32c,
md5Hash=md5_hash,
generation=generation,
metageneration=metageneration,
componentCount=component_count,
updated=updated,
metadata=custom_metadata,
mediaLink=media_link,
storageClass=storage_class)
# Remaining functions amend cloud_api_object.
self._TranslateDeleteMarker(key, cloud_api_object)
if not fields or 'acl' in fields:
generation_str = GenerationFromUrlAndString(
StorageUrlFromString(self.provider), generation)
self._TranslateBotoKeyAcl(key, cloud_api_object,
generation=generation_str)
return cloud_api_object
def _TranslateBotoKeyCustomMetadata(self, key):
"""Populates an apitools message from custom metadata in the boto key."""
custom_metadata = None
if getattr(key, 'metadata', None):
custom_metadata = apitools_messages.Object.MetadataValue(
additionalProperties=[])
for k, v in key.metadata.iteritems():
if k.lower() == 'content-language':
# Work around content-language being inserted into custom metadata.
continue
custom_metadata.additionalProperties.append(
apitools_messages.Object.MetadataValue.AdditionalProperty(
key=k, value=v))
return custom_metadata
def _TranslateBotoKeyGeneration(self, key):
"""Returns the generation/version_id number from the boto key if present."""
generation = None
if self.provider == 'gs':
if getattr(key, 'generation', None):
generation = long(key.generation)
elif self.provider == 's3':
if getattr(key, 'version_id', None):
generation = EncodeStringAsLong(key.version_id)
return generation
def _TranslateBotoKeyMetageneration(self, key):
"""Returns the metageneration number from the boto key if present."""
metageneration = None
if self.provider == 'gs':
if getattr(key, 'metageneration', None):
metageneration = long(key.metageneration)
return metageneration
def _TranslateBotoKeyTimestamp(self, key):
"""Parses the timestamp from the boto key into an datetime object.
This avoids a dependency on dateutil.
Args:
key: Boto key to get timestamp from.
Returns:
datetime object if string is parsed successfully, None otherwise.
"""
if key.last_modified:
if '.' in key.last_modified:
key_us_timestamp = key.last_modified.rstrip('Z') + '000Z'
else:
key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z'
fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
try:
return datetime.datetime.strptime(key_us_timestamp, fmt)
except ValueError:
try:
# Try alternate format
fmt = '%a, %d %b %Y %H:%M:%S %Z'
return datetime.datetime.strptime(key.last_modified, fmt)
except ValueError:
# Could not parse the time; leave updated as None.
return None
def _TranslateDeleteMarker(self, key, cloud_api_object):
"""Marks deleted objects with a metadata value (for S3 compatibility)."""
if isinstance(key, DeleteMarker):
if not cloud_api_object.metadata:
cloud_api_object.metadata = apitools_messages.Object.MetadataValue()
cloud_api_object.metadata.additionalProperties = []
cloud_api_object.metadata.additionalProperties.append(
apitools_messages.Object.MetadataValue.AdditionalProperty(
key=S3_DELETE_MARKER_GUID, value=True))
def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None):
"""Updates cloud_api_object with the ACL from the boto key."""
storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name,
generation=generation)
headers = {}
self._AddApiVersionToHeaders(headers)
try:
if self.provider == 'gs':
key_acl = storage_uri_for_key.get_acl(headers=headers)
# key.get_acl() does not support versioning so we need to use
# storage_uri to ensure we're getting the versioned ACL.
for acl in AclTranslation.BotoObjectAclToMessage(key_acl):
cloud_api_object.acl.append(acl)
if self.provider == 's3':
key_acl = key.get_xml_acl(headers=headers)
# ACLs for s3 are different and we use special markers to represent
# them in the gsutil Cloud API.
AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl)
except boto.exception.GSResponseError, e:
if e.status == 403:
# Consume access denied exceptions to mimic JSON behavior of simply
# returning None if sufficient permission is not present. The caller
# needs to handle the case where the ACL is not populated.
pass
else:
raise
def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None,
generation=None, not_found_exception=None):
"""Translates a Boto exception and raises the translated or original value.
Args:
e: Any Exception.
bucket_name: Optional bucket name in request that caused the exception.
object_name: Optional object name in request that caused the exception.
generation: Optional generation in request that caused the exception.
not_found_exception: Optional exception to raise in the not-found case.
Raises:
Translated CloudApi exception, or the original exception if it was not
translatable.
"""
translated_exception = self._TranslateBotoException(
e, bucket_name=bucket_name, object_name=object_name,
generation=generation, not_found_exception=not_found_exception)
if translated_exception:
raise translated_exception
else:
raise
def _TranslateBotoException(self, e, bucket_name=None, object_name=None,
generation=None, not_found_exception=None):
"""Translates boto exceptions into their gsutil Cloud API equivalents.
Args:
e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
bucket_name: Optional bucket name in request that caused the exception.
object_name: Optional object name in request that caused the exception.
generation: Optional generation in request that caused the exception.
not_found_exception: Optional exception to raise in the not-found case.
Returns:
CloudStorageApiServiceException for translatable exceptions, None
otherwise.
Because we're using isinstance, check for subtypes first.
"""
if isinstance(e, boto.exception.StorageResponseError):
if e.status == 400:
return BadRequestException(e.code, status=e.status, body=e.body)
elif e.status == 401 or e.status == 403:
return AccessDeniedException(e.code, status=e.status, body=e.body)
elif e.status == 404:
if not_found_exception:
# The exception is pre-constructed prior to translation; the HTTP
# status code isn't available at that time.
setattr(not_found_exception, 'status', e.status)
return not_found_exception
elif bucket_name:
if object_name:
return CreateObjectNotFoundException(e.status, self.provider,
bucket_name, object_name,
generation=generation)
return CreateBucketNotFoundException(e.status, self.provider,
bucket_name)
return NotFoundException(e.message, status=e.status, body=e.body)
elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code:
return NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
status=e.status, body=e.body)
elif e.status == 410:
# 410 errors should always cause us to start over - either the UploadID
# has expired or there was a server-side problem that requires starting
# the upload over from scratch.
return ResumableUploadStartOverException(e.message)
elif e.status == 412:
return PreconditionException(e.code, status=e.status, body=e.body)
if isinstance(e, boto.exception.StorageCreateError):
return ServiceException('Bucket already exists.', status=e.status,
body=e.body)
if isinstance(e, boto.exception.BotoServerError):
return ServiceException(e.message, status=e.status, body=e.body)
if isinstance(e, boto.exception.InvalidUriError):
# Work around textwrap when searching for this string.
if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
return NotFoundException(e.message, status=404)
return InvalidUrlError(e.message)
if isinstance(e, boto.exception.ResumableUploadException):
if e.disposition == boto.exception.ResumableTransferDisposition.ABORT:
return ResumableUploadAbortException(e.message)
elif (e.disposition ==
boto.exception.ResumableTransferDisposition.START_OVER):
return ResumableUploadStartOverException(e.message)
else:
return ResumableUploadException(e.message)
if isinstance(e, boto.exception.ResumableDownloadException):
return ResumableDownloadException(e.message)
return None
# For function docstrings, see CloudApiDelegator class.
def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False):
"""See CloudApiDelegator class for function doc strings."""
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
if def_obj_acl:
return uri.get_def_acl()
else:
return uri.get_acl()
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
def_obj_acl=False):
"""See CloudApiDelegator class for function doc strings."""
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
if canned:
if def_obj_acl:
canned_acls = uri.canned_acls()
if acl_text not in canned_acls:
raise CommandException('Invalid canned ACL "%s".' % acl_text)
uri.set_def_acl(acl_text, uri.object_name)
else:
canned_acls = uri.canned_acls()
if acl_text not in canned_acls:
raise CommandException('Invalid canned ACL "%s".' % acl_text)
uri.set_acl(acl_text, uri.object_name)
else:
if def_obj_acl:
uri.set_def_xml_acl(acl_text, uri.object_name)
else:
uri.set_xml_acl(acl_text, uri.object_name)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
# pylint: disable=catching-non-exception
def XmlPassThroughSetCors(self, cors_text, storage_url):
"""See CloudApiDelegator class for function doc strings."""
# Parse XML document and convert into Cors object.
if storage_url.scheme == 's3':
cors_obj = S3Cors()
else:
cors_obj = Cors()
h = handler.XmlHandler(cors_obj, None)
try:
xml.sax.parseString(cors_text, h)
except SaxExceptions.SAXParseException, e:
raise CommandException('Requested CORS is invalid: %s at line %s, '
'column %s' % (e.getMessage(), e.getLineNumber(),
e.getColumnNumber()))
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
uri.set_cors(cors_obj, False)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
def XmlPassThroughGetCors(self, storage_url):
"""See CloudApiDelegator class for function doc strings."""
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
try:
cors = uri.get_cors(False)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8))
# Pretty-print the XML to make it more easily human editable.
return parsed_xml.toprettyxml(indent=' ')
def XmlPassThroughGetLifecycle(self, storage_url):
"""See CloudApiDelegator class for function doc strings."""
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
lifecycle = uri.get_lifecycle_config(False)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8))
# Pretty-print the XML to make it more easily human editable.
return parsed_xml.toprettyxml(indent=' ')
def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url):
"""See CloudApiDelegator class for function doc strings."""
# Parse XML document and convert into lifecycle object.
if storage_url.scheme == 's3':
lifecycle_obj = S3Lifecycle()
else:
lifecycle_obj = LifecycleConfig()
h = handler.XmlHandler(lifecycle_obj, None)
try:
xml.sax.parseString(lifecycle_text, h)
except SaxExceptions.SAXParseException, e:
raise CommandException(
'Requested lifecycle config is invalid: %s at line %s, column %s' %
(e.getMessage(), e.getLineNumber(), e.getColumnNumber()))
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
uri.configure_lifecycle(lifecycle_obj, False)
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
def XmlPassThroughGetLogging(self, storage_url):
"""See CloudApiDelegator class for function doc strings."""
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
logging_config_xml = UnaryDictToXml(uri.get_logging_config())
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
return XmlParseString(logging_config_xml).toprettyxml()
def XmlPassThroughGetWebsite(self, storage_url):
"""See CloudApiDelegator class for function doc strings."""
try:
uri = boto.storage_uri(
storage_url.url_string, suppress_consec_slashes=False,
bucket_storage_uri_class=self.bucket_storage_uri_class,
debug=self.debug)
web_config_xml = UnaryDictToXml(uri.get_website_config())
except TRANSLATABLE_BOTO_EXCEPTIONS, e:
self._TranslateExceptionAndRaise(e)
return XmlParseString(web_config_xml).toprettyxml()