#!/usr/bin/python
"""
Copyright 2014 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
Utilities for accessing Google Cloud Storage.
TODO(epoger): move this into tools/utils for broader use?
"""
# System-level imports
import os
import posixpath
import sys
try:
from apiclient.discovery import build as build_service
except ImportError:
print ('Missing google-api-python-client. Please install it; directions '
'can be found at https://developers.google.com/api-client-library/'
'python/start/installation')
raise
# Local imports
import url_utils
def download_file(source_bucket, source_path, dest_path,
create_subdirs_if_needed=False):
""" Downloads a single file from Google Cloud Storage to local disk.
Args:
source_bucket: GCS bucket to download the file from
source_path: full path (Posix-style) within that bucket
dest_path: full path (local-OS-style) on local disk to copy the file to
create_subdirs_if_needed: boolean; whether to create subdirectories as
needed to create dest_path
"""
source_http_url = posixpath.join(
'http://storage.googleapis.com', source_bucket, source_path)
url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
create_subdirs_if_needed=create_subdirs_if_needed)
def list_bucket_contents(bucket, subdir=None):
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
Uses the API documented at
https://developers.google.com/storage/docs/json_api/v1/objects/list
Args:
bucket: name of the Google Storage bucket
subdir: directory within the bucket to list, or None for root directory
"""
# The GCS command relies on the subdir name (if any) ending with a slash.
if subdir and not subdir.endswith('/'):
subdir += '/'
subdir_length = len(subdir) if subdir else 0
storage = build_service('storage', 'v1')
command = storage.objects().list(
bucket=bucket, delimiter='/', fields='items(name),prefixes',
prefix=subdir)
results = command.execute()
# The GCS command returned two subdicts:
# prefixes: the full path of every directory within subdir, with trailing '/'
# items: property dict for each file object within subdir
# (including 'name', which is full path of the object)
dirs = []
for dir_fullpath in results.get('prefixes', []):
dir_basename = dir_fullpath[subdir_length:]
dirs.append(dir_basename[:-1]) # strip trailing slash
files = []
for file_properties in results.get('items', []):
file_fullpath = file_properties['name']
file_basename = file_fullpath[subdir_length:]
files.append(file_basename)
return (dirs, files)