# -*- coding: utf-8 -*-
# Copyright 2011 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of Unix-like rm command for cloud storage providers."""
from __future__ import absolute_import
from gslib.cloud_api import BucketNotFoundException
from gslib.cloud_api import NotEmptyException
from gslib.cloud_api import NotFoundException
from gslib.cloud_api import ServiceException
from gslib.command import Command
from gslib.command import GetFailureCount
from gslib.command import ResetFailureCount
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
from gslib.name_expansion import NameExpansionIterator
from gslib.storage_url import StorageUrlFromString
from gslib.translation_helper import PreconditionsFromHeaders
from gslib.util import GetCloudApiInstance
from gslib.util import NO_MAX
from gslib.util import Retry
from gslib.util import StdinIterator
_SYNOPSIS = """
gsutil rm [-f] [-r] url...
gsutil rm [-f] [-r] -I
"""
_DETAILED_HELP_TEXT = ("""
<B>SYNOPSIS</B>
""" + _SYNOPSIS + """
<B>DESCRIPTION</B>
The gsutil rm command removes objects.
For example, the command:
gsutil rm gs://bucket/subdir/*
will remove all objects in gs://bucket/subdir, but not in any of its
sub-directories. In contrast:
gsutil rm gs://bucket/subdir/**
will remove all objects under gs://bucket/subdir or any of its
subdirectories.
You can also use the -r option to specify recursive object deletion. Thus, for
example, either of the following two commands will remove gs://bucket/subdir
and all objects and subdirectories under it:
gsutil rm gs://bucket/subdir**
gsutil rm -r gs://bucket/subdir
The -r option will also delete all object versions in the subdirectory for
versioning-enabled buckets, whereas the ** command will only delete the live
version of each object in the subdirectory.
Running gsutil rm -r on a bucket will delete all versions of all objects in
the bucket, and then delete the bucket:
gsutil rm -r gs://bucket
If you want to delete all objects in the bucket, but not the bucket itself,
this command will work:
gsutil rm gs://bucket/**
If you have a large number of objects to remove you might want to use the
gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
removes:
gsutil -m rm -r gs://my_bucket/subdir
You can pass a list of URLs (one per line) to remove on stdin instead of as
command line arguments by using the -I option. This allows you to use gsutil
in a pipeline to remove objects identified by a program, such as:
some_program | gsutil -m rm -I
The contents of stdin can name cloud URLs and wildcards of cloud URLs.
Note that gsutil rm will refuse to remove files from the local
file system. For example this will fail:
gsutil rm *.txt
WARNING: Object removal cannot be undone. Google Cloud Storage is designed
to give developers a high amount of flexibility and control over their data,
and Google maintains strict controls over the processing and purging of
deleted data. To protect yourself from mistakes, you can configure object
versioning on your bucket(s). See 'gsutil help versions' for details.
<B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B>
Google Cloud Storage does not provide support for restoring data lost
or overwritten due to customer errors. If you have concerns that your
application software (or your users) may at some point erroneously delete or
overwrite data, you can protect yourself from that risk by enabling Object
Versioning (see "gsutil help versioning"). Doing so increases storage costs,
which can be partially mitigated by configuring Lifecycle Management to delete
older object versions (see "gsutil help lifecycle").
<B>OPTIONS</B>
-f Continues silently (without printing error messages) despite
errors when removing multiple objects. If some of the objects
could not be removed, gsutil's exit status will be non-zero even
if this flag is set. This option is implicitly set when running
"gsutil -m rm ...".
-I Causes gsutil to read the list of objects to remove from stdin.
This allows you to run a program that generates the list of
objects to remove.
-R, -r Causes bucket or bucket subdirectory contents (all objects and
subdirectories that it contains) to be removed recursively. If
used with a bucket-only URL (like gs://bucket), after deleting
objects and subdirectories gsutil will delete the bucket. The -r
flag implies the -a flag and will delete all object versions.
-a Delete all versions of an object.
""")
def _RemoveExceptionHandler(cls, e):
"""Simple exception handler to allow post-completion status."""
if not cls.continue_on_error:
cls.logger.error(str(e))
# TODO: Use shared state to track missing bucket names when we get a
# BucketNotFoundException. Then improve bucket removal logic and exception
# messages.
if isinstance(e, BucketNotFoundException):
cls.bucket_not_found_count += 1
cls.logger.error(str(e))
else:
cls.op_failure_count += 1
# pylint: disable=unused-argument
def _RemoveFoldersExceptionHandler(cls, e):
"""When removing folders, we don't mind if none exist."""
if (isinstance(e, CommandException.__class__) and
'No URLs matched' in e.message) or isinstance(e, NotFoundException):
pass
else:
raise e
def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None):
cls.RemoveFunc(name_expansion_result, thread_state=thread_state)
class RmCommand(Command):
"""Implementation of gsutil rm command."""
# Command specification. See base class for documentation.
command_spec = Command.CreateCommandSpec(
'rm',
command_name_aliases=['del', 'delete', 'remove'],
usage_synopsis=_SYNOPSIS,
min_args=0,
max_args=NO_MAX,
supported_sub_args='afIrR',
file_url_ok=False,
provider_url_ok=False,
urls_start_arg=0,
gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
gs_default_api=ApiSelector.JSON,
argparse_arguments=[
CommandArgument.MakeZeroOrMoreCloudURLsArgument()
]
)
# Help specification. See help_provider.py for documentation.
help_spec = Command.HelpSpec(
help_name='rm',
help_name_aliases=['del', 'delete', 'remove'],
help_type='command_help',
help_one_line_summary='Remove objects',
help_text=_DETAILED_HELP_TEXT,
subcommand_help_text={},
)
def RunCommand(self):
"""Command entry point for the rm command."""
# self.recursion_requested is initialized in command.py (so it can be
# checked in parent class for all commands).
self.continue_on_error = self.parallel_operations
self.read_args_from_stdin = False
self.all_versions = False
if self.sub_opts:
for o, unused_a in self.sub_opts:
if o == '-a':
self.all_versions = True
elif o == '-f':
self.continue_on_error = True
elif o == '-I':
self.read_args_from_stdin = True
elif o == '-r' or o == '-R':
self.recursion_requested = True
self.all_versions = True
if self.read_args_from_stdin:
if self.args:
raise CommandException('No arguments allowed with the -I flag.')
url_strs = StdinIterator()
else:
if not self.args:
raise CommandException('The rm command (without -I) expects at '
'least one URL.')
url_strs = self.args
# Tracks if any deletes failed.
self.op_failure_count = 0
# Tracks if any buckets were missing.
self.bucket_not_found_count = 0
bucket_urls_to_delete = []
bucket_strings_to_delete = []
if self.recursion_requested:
bucket_fields = ['id']
for url_str in url_strs:
url = StorageUrlFromString(url_str)
if url.IsBucket() or url.IsProvider():
for blr in self.WildcardIterator(url_str).IterBuckets(
bucket_fields=bucket_fields):
bucket_urls_to_delete.append(blr.storage_url)
bucket_strings_to_delete.append(url_str)
self.preconditions = PreconditionsFromHeaders(self.headers or {})
try:
# Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
name_expansion_iterator = NameExpansionIterator(
self.command_name, self.debug, self.logger, self.gsutil_api,
url_strs, self.recursion_requested, project_id=self.project_id,
all_versions=self.all_versions,
continue_on_error=self.continue_on_error or self.parallel_operations)
# Perform remove requests in parallel (-m) mode, if requested, using
# configured number of parallel processes and threads. Otherwise,
# perform requests with sequential function calls in current process.
self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
_RemoveExceptionHandler,
fail_on_error=(not self.continue_on_error),
shared_attrs=['op_failure_count', 'bucket_not_found_count'])
# Assuming the bucket has versioning enabled, url's that don't map to
# objects should throw an error even with all_versions, since the prior
# round of deletes only sends objects to a history table.
# This assumption that rm -a is only called for versioned buckets should be
# corrected, but the fix is non-trivial.
except CommandException as e:
# Don't raise if there are buckets to delete -- it's valid to say:
# gsutil rm -r gs://some_bucket
# if the bucket is empty.
if not bucket_urls_to_delete and not self.continue_on_error:
raise
# Reset the failure count if we failed due to an empty bucket that we're
# going to delete.
msg = 'No URLs matched: '
if msg in str(e):
parts = str(e).split(msg)
if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
ResetFailureCount()
else:
raise
except ServiceException, e:
if not self.continue_on_error:
raise
if self.bucket_not_found_count:
raise CommandException('Encountered non-existent bucket during listing')
if self.op_failure_count and not self.continue_on_error:
raise CommandException('Some files could not be removed.')
# If this was a gsutil rm -r command covering any bucket subdirs,
# remove any dir_$folder$ objects (which are created by various web UI
# tools to simulate folders).
if self.recursion_requested:
had_previous_failures = GetFailureCount() > 0
folder_object_wildcards = []
for url_str in url_strs:
url = StorageUrlFromString(url_str)
if url.IsObject():
folder_object_wildcards.append('%s**_$folder$' % url_str)
if folder_object_wildcards:
self.continue_on_error = True
try:
name_expansion_iterator = NameExpansionIterator(
self.command_name, self.debug,
self.logger, self.gsutil_api,
folder_object_wildcards, self.recursion_requested,
project_id=self.project_id,
all_versions=self.all_versions)
# When we're removing folder objects, always continue on error
self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
_RemoveFoldersExceptionHandler,
fail_on_error=False)
except CommandException as e:
# Ignore exception from name expansion due to an absent folder file.
if not e.reason.startswith('No URLs matched:'):
raise
if not had_previous_failures:
ResetFailureCount()
# Now that all data has been deleted, delete any bucket URLs.
for url in bucket_urls_to_delete:
self.logger.info('Removing %s...', url)
@Retry(NotEmptyException, tries=3, timeout_secs=1)
def BucketDeleteWithRetry():
self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme)
BucketDeleteWithRetry()
if self.op_failure_count:
plural_str = 's' if self.op_failure_count else ''
raise CommandException('%d file%s/object%s could not be removed.' % (
self.op_failure_count, plural_str, plural_str))
return 0
def RemoveFunc(self, name_expansion_result, thread_state=None):
gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
exp_src_url = name_expansion_result.expanded_storage_url
self.logger.info('Removing %s...', exp_src_url)
gsutil_api.DeleteObject(
exp_src_url.bucket_name, exp_src_url.object_name,
preconditions=self.preconditions, generation=exp_src_url.generation,
provider=exp_src_url.scheme)