#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main module for Google Cloud Storage command line tool."""
from __future__ import absolute_import
import ConfigParser
import datetime
import errno
import getopt
import logging
import os
import re
import signal
import socket
import sys
import textwrap
import traceback
# Load the gsutil version number and append it to boto.UserAgent so the value is
# set before anything instantiates boto. This has to run after THIRD_PARTY_DIR
# is modified (done in gsutil.py) but before any calls are made that would cause
# boto.s3.Connection to be loaded - otherwise the Connection class would end up
# with a static reference to the pre-modified version of the UserAgent field,
# so boto requests would not include gsutil/version# in the UserAgent string.
import boto
import gslib
# TODO: gsutil-beta: Cloud SDK scans for this string and performs
# substitution; ensure this works with both apitools and boto.
boto.UserAgent += ' gsutil/%s (%s)' % (gslib.VERSION, sys.platform)
if os.environ.get('CLOUDSDK_WRAPPER') == '1':
boto.UserAgent += ' Cloud SDK Command Line Tool'
if os.environ.get('CLOUDSDK_VERSION'):
boto.UserAgent += ' %s' % os.environ.get('CLOUDSDK_VERSION')
# pylint: disable=g-bad-import-order
# pylint: disable=g-import-not-at-top
import httplib2
import oauth2client
from gslib import wildcard_iterator
from gslib.cloud_api import AccessDeniedException
from gslib.cloud_api import ArgumentException
from gslib.cloud_api import BadRequestException
from gslib.cloud_api import ProjectIdException
from gslib.cloud_api import ServiceException
from gslib.command_runner import CommandRunner
import gslib.exception
from gslib.exception import CommandException
import apitools.base.py.exceptions as apitools_exceptions
from gslib.util import CreateLock
from gslib.util import GetBotoConfigFileList
from gslib.util import GetCertsFile
from gslib.util import GetCleanupFiles
from gslib.util import GsutilStreamHandler
from gslib.util import ProxyInfoFromEnvironmentVar
from gslib.sig_handling import GetCaughtSignals
from gslib.sig_handling import InitializeSignalHandling
from gslib.sig_handling import RegisterSignalHandler
GSUTIL_CLIENT_ID = '909320924072.apps.googleusercontent.com'
# Google OAuth2 clients always have a secret, even if the client is an installed
# application/utility such as gsutil. Of course, in such cases the "secret" is
# actually publicly known; security depends entirely on the secrecy of refresh
# tokens, which effectively become bearer tokens.
GSUTIL_CLIENT_NOTSOSECRET = 'p3RlpR10xMFh9ZXBS/ZNLYUu'
if os.environ.get('CLOUDSDK_WRAPPER') == '1':
# Cloud SDK installs have a separate client ID / secret.
GSUTIL_CLIENT_ID = '32555940559.apps.googleusercontent.com'
GSUTIL_CLIENT_NOTSOSECRET = 'ZmssLNjJy2998hD4CTg2ejr2'
CONFIG_KEYS_TO_REDACT = ['proxy', 'proxy_port', 'proxy_user', 'proxy_pass']
# We don't use the oauth2 authentication plugin directly; importing it here
# ensures that it's loaded and available by default when an operation requiring
# authentication is performed.
try:
# pylint: disable=unused-import,g-import-not-at-top
import gcs_oauth2_boto_plugin
except ImportError:
pass
DEBUG_WARNING = """
***************************** WARNING *****************************
*** You are running gsutil with debug output enabled.
*** Be aware that debug output includes authentication credentials.
*** Make sure to remove the value of the Authorization header for
*** each HTTP request printed to the console prior to posting to
*** a public medium such as a forum post or Stack Overflow.
***************************** WARNING *****************************
""".lstrip()
TRACE_WARNING = """
***************************** WARNING *****************************
*** You are running gsutil with trace output enabled.
*** Be aware that trace output includes authentication credentials
*** and may include the contents of any files accessed during the trace.
***************************** WARNING *****************************
""".lstrip()
HTTP_WARNING = """
***************************** WARNING *****************************
*** You are running gsutil with the "https_validate_certificates" config
*** variable set to False. This option should always be set to True in
*** production environments to protect against man-in-the-middle attacks,
*** and leaking of user data.
***************************** WARNING *****************************
""".lstrip()
debug = 0
test_exception_traces = False
# pylint: disable=unused-argument
def _CleanupSignalHandler(signal_num, cur_stack_frame):
"""Cleans up if process is killed with SIGINT, SIGQUIT or SIGTERM."""
_Cleanup()
def _Cleanup():
for fname in GetCleanupFiles():
try:
os.unlink(fname)
except: # pylint: disable=bare-except
pass
def _OutputAndExit(message):
"""Outputs message and exists with code 1."""
from gslib.util import UTF8 # pylint: disable=g-import-not-at-top
if debug >= 2 or test_exception_traces:
stack_trace = traceback.format_exc()
err = ('DEBUG: Exception stack trace:\n %s\n' %
re.sub('\\n', '\n ', stack_trace))
else:
err = '%s\n' % message
try:
sys.stderr.write(err.encode(UTF8))
except UnicodeDecodeError:
# Can happen when outputting invalid Unicode filenames.
sys.stderr.write(err)
sys.exit(1)
def _OutputUsageAndExit(command_runner):
command_runner.RunNamedCommand('help')
sys.exit(1)
class GsutilFormatter(logging.Formatter):
"""A logging.Formatter that supports logging microseconds (%f)."""
def formatTime(self, record, datefmt=None):
if datefmt:
return datetime.datetime.fromtimestamp(record.created).strftime(datefmt)
# Use default implementation if datefmt is not specified.
return super(GsutilFormatter, self).formatTime(record, datefmt=datefmt)
def _ConfigureLogging(level=logging.INFO):
"""Similar to logging.basicConfig() except it always adds a handler."""
log_format = '%(levelname)s %(asctime)s %(filename)s] %(message)s'
date_format = '%m%d %H:%M:%S.%f'
formatter = GsutilFormatter(fmt=log_format, datefmt=date_format)
handler = GsutilStreamHandler()
handler.setFormatter(formatter)
root_logger = logging.getLogger()
root_logger.addHandler(handler)
root_logger.setLevel(level)
def main():
InitializeSignalHandling()
# Any modules used in initializing multiprocessing variables must be
# imported after importing gslib.__main__.
# pylint: disable=redefined-outer-name,g-import-not-at-top
import gslib.boto_translation
import gslib.command
import gslib.util
from gslib.util import BOTO_IS_SECURE
from gslib.util import CERTIFICATE_VALIDATION_ENABLED
# pylint: disable=unused-variable
from gcs_oauth2_boto_plugin import oauth2_client
from apitools.base.py import credentials_lib
# pylint: enable=unused-variable
from gslib.util import CheckMultiprocessingAvailableAndInit
if CheckMultiprocessingAvailableAndInit().is_available:
# These setup methods must be called, and, on Windows, they can only be
# called from within an "if __name__ == '__main__':" block.
gslib.command.InitializeMultiprocessingVariables()
gslib.boto_translation.InitializeMultiprocessingVariables()
else:
gslib.command.InitializeThreadingVariables()
# This needs to be done after gslib.util.InitializeMultiprocessingVariables(),
# since otherwise we can't call gslib.util.CreateLock.
try:
# pylint: disable=unused-import,g-import-not-at-top
import gcs_oauth2_boto_plugin
gcs_oauth2_boto_plugin.oauth2_helper.SetFallbackClientIdAndSecret(
GSUTIL_CLIENT_ID, GSUTIL_CLIENT_NOTSOSECRET)
gcs_oauth2_boto_plugin.oauth2_helper.SetLock(CreateLock())
credentials_lib.SetCredentialsCacheFileLock(CreateLock())
except ImportError:
pass
global debug
global test_exception_traces
if not (2, 6) <= sys.version_info[:3] < (3,):
raise gslib.exception.CommandException(
'gsutil requires python 2.6 or 2.7.')
# In gsutil 4.0 and beyond, we don't use the boto library for the JSON
# API. However, we still store gsutil configuration data in the .boto
# config file for compatibility with previous versions and user convenience.
# Many users have a .boto configuration file from previous versions, and it
# is useful to have all of the configuration for gsutil stored in one place.
command_runner = CommandRunner()
if not BOTO_IS_SECURE:
raise CommandException('\n'.join(textwrap.wrap(
'Your boto configuration has is_secure = False. Gsutil cannot be '
'run this way, for security reasons.')))
headers = {}
parallel_operations = False
quiet = False
version = False
debug = 0
trace_token = None
test_exception_traces = False
# If user enters no commands just print the usage info.
if len(sys.argv) == 1:
sys.argv.append('help')
# Change the default of the 'https_validate_certificates' boto option to
# True (it is currently False in boto).
if not boto.config.has_option('Boto', 'https_validate_certificates'):
if not boto.config.has_section('Boto'):
boto.config.add_section('Boto')
boto.config.setbool('Boto', 'https_validate_certificates', True)
gslib.util.certs_file_lock = CreateLock()
for signal_num in GetCaughtSignals():
RegisterSignalHandler(signal_num, _CleanupSignalHandler)
GetCertsFile()
try:
try:
opts, args = getopt.getopt(sys.argv[1:], 'dDvo:h:mq',
['debug', 'detailedDebug', 'version', 'option',
'help', 'header', 'multithreaded', 'quiet',
'testexceptiontraces', 'trace-token='])
except getopt.GetoptError as e:
_HandleCommandException(gslib.exception.CommandException(e.msg))
for o, a in opts:
if o in ('-d', '--debug'):
# Passing debug=2 causes boto to include httplib header output.
debug = 3
elif o in ('-D', '--detailedDebug'):
# We use debug level 3 to ask gsutil code to output more detailed
# debug output. This is a bit of a hack since it overloads the same
# flag that was originally implemented for boto use. And we use -DD
# to ask for really detailed debugging (i.e., including HTTP payload).
if debug == 3:
debug = 4
else:
debug = 3
elif o in ('-?', '--help'):
_OutputUsageAndExit(command_runner)
elif o in ('-h', '--header'):
(hdr_name, _, hdr_val) = a.partition(':')
if not hdr_name:
_OutputUsageAndExit(command_runner)
headers[hdr_name.lower()] = hdr_val
elif o in ('-m', '--multithreaded'):
parallel_operations = True
elif o in ('-q', '--quiet'):
quiet = True
elif o in ('-v', '--version'):
version = True
elif o == '--trace-token':
trace_token = a
elif o == '--testexceptiontraces': # Hidden flag for integration tests.
test_exception_traces = True
elif o in ('-o', '--option'):
(opt_section_name, _, opt_value) = a.partition('=')
if not opt_section_name:
_OutputUsageAndExit(command_runner)
(opt_section, _, opt_name) = opt_section_name.partition(':')
if not opt_section or not opt_name:
_OutputUsageAndExit(command_runner)
if not boto.config.has_section(opt_section):
boto.config.add_section(opt_section)
boto.config.set(opt_section, opt_name, opt_value)
httplib2.debuglevel = debug
if trace_token:
sys.stderr.write(TRACE_WARNING)
if debug > 1:
sys.stderr.write(DEBUG_WARNING)
if debug >= 2:
_ConfigureLogging(level=logging.DEBUG)
command_runner.RunNamedCommand('ver', ['-l'])
config_items = []
try:
config_items.extend(boto.config.items('Boto'))
config_items.extend(boto.config.items('GSUtil'))
except ConfigParser.NoSectionError:
pass
for i in xrange(len(config_items)):
config_item_key = config_items[i][0]
if config_item_key in CONFIG_KEYS_TO_REDACT:
config_items[i] = (config_item_key, 'REDACTED')
sys.stderr.write('Command being run: %s\n' % ' '.join(sys.argv))
sys.stderr.write('config_file_list: %s\n' % GetBotoConfigFileList())
sys.stderr.write('config: %s\n' % str(config_items))
elif quiet:
_ConfigureLogging(level=logging.WARNING)
else:
_ConfigureLogging(level=logging.INFO)
# oauth2client uses info logging in places that would better
# correspond to gsutil's debug logging (e.g., when refreshing
# access tokens).
oauth2client.client.logger.setLevel(logging.WARNING)
if not CERTIFICATE_VALIDATION_ENABLED:
sys.stderr.write(HTTP_WARNING)
if version:
command_name = 'version'
elif not args:
command_name = 'help'
else:
command_name = args[0]
_CheckAndWarnForProxyDifferences()
if os.environ.get('_ARGCOMPLETE', '0') == '1':
return _PerformTabCompletion(command_runner)
return _RunNamedCommandAndHandleExceptions(
command_runner, command_name, args=args[1:], headers=headers,
debug_level=debug, trace_token=trace_token,
parallel_operations=parallel_operations)
finally:
_Cleanup()
def _CheckAndWarnForProxyDifferences():
# If there are both boto config and environment variable config present for
# proxies, unset the environment variable and warn if it differs.
boto_port = boto.config.getint('Boto', 'proxy_port', 0)
if boto.config.get('Boto', 'proxy', None) or boto_port:
for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
if proxy_env_var in os.environ and os.environ[proxy_env_var]:
differing_values = []
proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
if proxy_info.proxy_host != boto.config.get('Boto', 'proxy', None):
differing_values.append(
'Boto proxy host: "%s" differs from %s proxy host: "%s"' %
(boto.config.get('Boto', 'proxy', None), proxy_env_var,
proxy_info.proxy_host))
if (proxy_info.proxy_user !=
boto.config.get('Boto', 'proxy_user', None)):
differing_values.append(
'Boto proxy user: "%s" differs from %s proxy user: "%s"' %
(boto.config.get('Boto', 'proxy_user', None), proxy_env_var,
proxy_info.proxy_user))
if (proxy_info.proxy_pass !=
boto.config.get('Boto', 'proxy_pass', None)):
differing_values.append(
'Boto proxy password differs from %s proxy password' %
proxy_env_var)
# Only compare ports if at least one is present, since the
# boto logic for selecting default ports has not yet executed.
if ((proxy_info.proxy_port or boto_port) and
proxy_info.proxy_port != boto_port):
differing_values.append(
'Boto proxy port: "%s" differs from %s proxy port: "%s"' %
(boto_port, proxy_env_var, proxy_info.proxy_port))
if differing_values:
sys.stderr.write('\n'.join(textwrap.wrap(
'WARNING: Proxy configuration is present in both the %s '
'environment variable and boto configuration, but '
'configuration differs. boto configuration proxy values will '
'be used. Differences detected:' % proxy_env_var)))
sys.stderr.write('\n%s\n' % '\n'.join(differing_values))
# Regardless of whether the proxy configuration values matched,
# delete the environment variable so as not to confuse boto.
del os.environ[proxy_env_var]
def _HandleUnknownFailure(e):
# Called if we fall through all known/handled exceptions.
_OutputAndExit('Failure: %s.' % e)
def _HandleCommandException(e):
if e.informational:
_OutputAndExit(e.reason)
else:
_OutputAndExit('CommandException: %s' % e.reason)
# pylint: disable=unused-argument
def _HandleControlC(signal_num, cur_stack_frame):
"""Called when user hits ^C.
This function prints a brief message instead of the normal Python stack trace
(unless -D option is used).
Args:
signal_num: Signal that was caught.
cur_stack_frame: Unused.
"""
if debug >= 2:
stack_trace = ''.join(traceback.format_list(traceback.extract_stack()))
_OutputAndExit(
'DEBUG: Caught signal %d - Exception stack trace:\n'
' %s' % (signal_num, re.sub('\\n', '\n ', stack_trace)))
else:
_OutputAndExit('Caught signal %d - exiting' % signal_num)
def _HandleSigQuit(signal_num, cur_stack_frame):
"""Called when user hits ^\\, so we can force breakpoint a running gsutil."""
import pdb # pylint: disable=g-import-not-at-top
pdb.set_trace()
def _ConstructAccountProblemHelp(reason):
"""Constructs a help string for an access control error.
Args:
reason: e.reason string from caught exception.
Returns:
Contructed help text.
"""
default_project_id = boto.config.get_value('GSUtil', 'default_project_id')
# pylint: disable=line-too-long, g-inconsistent-quotes
acct_help = (
"Your request resulted in an AccountProblem (403) error. Usually this "
"happens if you attempt to create a bucket without first having "
"enabled billing for the project you are using. Please ensure billing is "
"enabled for your project by following the instructions at "
"`Google Developers Console<https://developers.google.com/console/help/billing>`. ")
if default_project_id:
acct_help += (
"In the project overview, ensure that the Project Number listed for "
"your project matches the project ID (%s) from your boto config file. "
% default_project_id)
acct_help += (
"If the above doesn't resolve your AccountProblem, please send mail to "
"gs-team@google.com requesting assistance, noting the exact command you "
"ran, the fact that you received a 403 AccountProblem error, and your "
"project ID. Please do not post your project ID on StackOverflow. "
"Note: It's possible to use Google Cloud Storage without enabling "
"billing if you're only listing or reading objects for which you're "
"authorized, or if you're uploading objects to a bucket billed to a "
"project that has billing enabled. But if you're attempting to create "
"buckets or upload objects to a bucket owned by your own project, you "
"must first enable billing for that project.")
return acct_help
def _CheckAndHandleCredentialException(e, args):
# Provide detail to users who have no boto config file (who might previously
# have been using gsutil only for accessing publicly readable buckets and
# objects).
# pylint: disable=g-import-not-at-top
from gslib.util import HasConfiguredCredentials
if (not HasConfiguredCredentials() and
not boto.config.get_value('Tests', 'bypass_anonymous_access_warning',
False)):
# The check above allows tests to assert that we get a particular,
# expected failure, rather than always encountering this error message
# when there are no configured credentials. This allows tests to
# simulate a second user without permissions, without actually requiring
# two separate configured users.
if os.environ.get('CLOUDSDK_WRAPPER') == '1':
_OutputAndExit('\n'.join(textwrap.wrap(
'You are attempting to access protected data with no configured '
'credentials. Please visit '
'https://cloud.google.com/console#/project and sign up for an '
'account, and then run the "gcloud auth login" command to '
'configure gsutil to use these credentials.')))
else:
_OutputAndExit('\n'.join(textwrap.wrap(
'You are attempting to access protected data with no configured '
'credentials. Please visit '
'https://cloud.google.com/console#/project and sign up for an '
'account, and then run the "gsutil config" command to configure '
'gsutil to use these credentials.')))
elif (e.reason and
(e.reason == 'AccountProblem' or e.reason == 'Account disabled.' or
'account for the specified project has been disabled' in e.reason)
and ','.join(args).find('gs://') != -1):
_OutputAndExit('\n'.join(textwrap.wrap(
_ConstructAccountProblemHelp(e.reason))))
def _RunNamedCommandAndHandleExceptions(command_runner, command_name, args=None,
headers=None, debug_level=0,
trace_token=None,
parallel_operations=False):
"""Runs the command with the given command runner and arguments."""
# pylint: disable=g-import-not-at-top
from gslib.util import GetConfigFilePath
from gslib.util import IS_WINDOWS
from gslib.util import IsRunningInteractively
try:
# Catch ^C so we can print a brief message instead of the normal Python
# stack trace. Register as a final signal handler because this handler kills
# the main gsutil process (so it must run last).
RegisterSignalHandler(signal.SIGINT, _HandleControlC, is_final_handler=True)
# Catch ^\ so we can force a breakpoint in a running gsutil.
if not IS_WINDOWS:
RegisterSignalHandler(signal.SIGQUIT, _HandleSigQuit)
return command_runner.RunNamedCommand(command_name, args, headers,
debug_level, trace_token,
parallel_operations)
except AttributeError as e:
if str(e).find('secret_access_key') != -1:
_OutputAndExit('Missing credentials for the given URI(s). Does your '
'boto config file contain all needed credentials?')
else:
_OutputAndExit(str(e))
except gslib.exception.CommandException as e:
_HandleCommandException(e)
except getopt.GetoptError as e:
_HandleCommandException(gslib.exception.CommandException(e.msg))
except boto.exception.InvalidUriError as e:
_OutputAndExit('InvalidUriError: %s.' % e.message)
except gslib.exception.InvalidUrlError as e:
_OutputAndExit('InvalidUrlError: %s.' % e.message)
except boto.auth_handler.NotReadyToAuthenticate:
_OutputAndExit('NotReadyToAuthenticate')
except OSError as e:
_OutputAndExit('OSError: %s.' % e.strerror)
except IOError as e:
if (e.errno == errno.EPIPE or (IS_WINDOWS and e.errno == errno.EINVAL)
and not IsRunningInteractively()):
# If we get a pipe error, this just means that the pipe to stdout or
# stderr is broken. This can happen if the user pipes gsutil to a command
# that doesn't use the entire output stream. Instead of raising an error,
# just swallow it up and exit cleanly.
sys.exit(0)
else:
raise
except wildcard_iterator.WildcardException as e:
_OutputAndExit(e.reason)
except ProjectIdException as e:
_OutputAndExit(
'You are attempting to perform an operation that requires a '
'project id, with none configured. Please re-run '
'gsutil config and make sure to follow the instructions for '
'finding and entering your default project id.')
except BadRequestException as e:
if e.reason == 'MissingSecurityHeader':
_CheckAndHandleCredentialException(e, args)
_OutputAndExit(e)
except AccessDeniedException as e:
_CheckAndHandleCredentialException(e, args)
_OutputAndExit(e)
except ArgumentException as e:
_OutputAndExit(e)
except ServiceException as e:
_OutputAndExit(e)
except apitools_exceptions.HttpError as e:
# These should usually be retried by the underlying implementation or
# wrapped by CloudApi ServiceExceptions, but if we do get them,
# print something useful.
_OutputAndExit('HttpError: %s, %s' % (getattr(e.response, 'status', ''),
e.content or ''))
except socket.error as e:
if e.args[0] == errno.EPIPE:
# Retrying with a smaller file (per suggestion below) works because
# the library code send loop (in boto/s3/key.py) can get through the
# entire file and then request the HTTP response before the socket
# gets closed and the response lost.
_OutputAndExit(
'Got a "Broken pipe" error. This can happen to clients using Python '
'2.x, when the server sends an error response and then closes the '
'socket (see http://bugs.python.org/issue5542). If you are trying to '
'upload a large object you might retry with a small (say 200k) '
'object, and see if you get a more specific error code.'
)
else:
_HandleUnknownFailure(e)
except Exception as e:
# Check for two types of errors related to service accounts. These errors
# appear to be the same except for their messages, but they are caused by
# different problems and both have unhelpful error messages. Moreover,
# the error type belongs to PyOpenSSL, which is not necessarily installed.
if 'mac verify failure' in str(e):
_OutputAndExit(
'Encountered an error while refreshing access token. '
'If you are using a service account,\nplease verify that the '
'gs_service_key_file_password field in your config file,'
'\n%s, is correct.' % GetConfigFilePath())
elif 'asn1 encoding routines' in str(e):
_OutputAndExit(
'Encountered an error while refreshing access token. '
'If you are using a service account,\nplease verify that the '
'gs_service_key_file field in your config file,\n%s, is correct.'
% GetConfigFilePath())
_HandleUnknownFailure(e)
def _PerformTabCompletion(command_runner):
"""Performs gsutil-specific tab completion for the shell."""
# argparse and argcomplete are bundled with the Google Cloud SDK.
# When gsutil is invoked from the Google Cloud SDK, both should be available.
try:
import argcomplete
import argparse
except ImportError as e:
_OutputAndExit('A library required for performing tab completion was'
' not found.\nCause: %s' % e)
parser = argparse.ArgumentParser(add_help=False)
subparsers = parser.add_subparsers()
command_runner.ConfigureCommandArgumentParsers(subparsers)
argcomplete.autocomplete(parser, exit_method=sys.exit)
return 0
if __name__ == '__main__':
sys.exit(main())