普通文本  |  262行  |  9.15 KB

# -*- coding: utf-8 -*-
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions and class for listing commands such as ls and du."""

from __future__ import absolute_import

import fnmatch

from gslib.exception import CommandException
from gslib.plurality_checkable_iterator import PluralityCheckableIterator
from gslib.util import UTF8
from gslib.wildcard_iterator import StorageUrlFromString


def PrintNewLine():
  """Default function for printing new lines between directories."""
  print


def PrintDirHeader(bucket_listing_ref):
  """Default function for printing headers for prefixes.

  Header is printed prior to listing the contents of the prefix.

  Args:
    bucket_listing_ref: BucketListingRef of type PREFIX.
  """
  print '%s:' % bucket_listing_ref.url_string.encode(UTF8)


def PrintBucketHeader(bucket_listing_ref):  # pylint: disable=unused-argument
  """Default function for printing headers for buckets.

  Header is printed prior to listing the contents of the bucket.

  Args:
    bucket_listing_ref: BucketListingRef of type BUCKET.
  """
  pass


def PrintDir(bucket_listing_ref):
  """Default function for printing buckets or prefixes.

  Args:
    bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
  """
  print bucket_listing_ref.url_string.encode(UTF8)


# pylint: disable=unused-argument
def PrintDirSummary(num_bytes, bucket_listing_ref):
  """Off-by-default function for printing buckets or prefix size summaries.

  Args:
    num_bytes: Number of bytes contained in the directory.
    bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
  """
  pass


def PrintObject(bucket_listing_ref):
  """Default printing function for objects.

  Args:
    bucket_listing_ref: BucketListingRef of type OBJECT.

  Returns:
    (num_objects, num_bytes).
  """
  print bucket_listing_ref.url_string.encode(UTF8)
  return (1, 0)


class LsHelper(object):
  """Helper class for ls and du."""

  def __init__(self, iterator_func, logger,
               print_object_func=PrintObject,
               print_dir_func=PrintDir,
               print_dir_header_func=PrintDirHeader,
               print_bucket_header_func=PrintBucketHeader,
               print_dir_summary_func=PrintDirSummary,
               print_newline_func=PrintNewLine,
               all_versions=False, should_recurse=False,
               exclude_patterns=None, fields=('name',)):
    """Initializes the helper class to prepare for listing.

    Args:
      iterator_func: Function for instantiating iterator.
                     Inputs-
                       url_string- Url string to iterate on. May include
                                   wildcards.
                       all_versions=False- If true, iterate over all object
                                           versions.
      logger: Logger for outputting warnings / errors.
      print_object_func: Function for printing objects.
      print_dir_func:    Function for printing buckets/prefixes.
      print_dir_header_func: Function for printing header line for buckets
                             or prefixes.
      print_bucket_header_func: Function for printing header line for buckets
                                or prefixes.
      print_dir_summary_func: Function for printing size summaries about
                              buckets/prefixes.
      print_newline_func: Function for printing new lines between dirs.
      all_versions:      If true, list all object versions.
      should_recurse:    If true, recursively listing buckets/prefixes.
      exclude_patterns:  Patterns to exclude when listing.
      fields:            Fields to request from bucket listings; this should
                         include all fields that need to be populated in
                         objects so they can be listed. Can be set to None
                         to retrieve all object fields. Defaults to short
                         listing fields.
    """
    self._iterator_func = iterator_func
    self.logger = logger
    self._print_object_func = print_object_func
    self._print_dir_func = print_dir_func
    self._print_dir_header_func = print_dir_header_func
    self._print_bucket_header_func = print_bucket_header_func
    self._print_dir_summary_func = print_dir_summary_func
    self._print_newline_func = print_newline_func
    self.all_versions = all_versions
    self.should_recurse = should_recurse
    self.exclude_patterns = exclude_patterns
    self.bucket_listing_fields = fields

  def ExpandUrlAndPrint(self, url):
    """Iterates over the given URL and calls print functions.

    Args:
      url: StorageUrl to iterate over.

    Returns:
      (num_objects, num_bytes) total number of objects and bytes iterated.
    """
    num_objects = 0
    num_dirs = 0
    num_bytes = 0
    print_newline = False

    if url.IsBucket() or self.should_recurse:
      # IsBucket() implies a top-level listing.
      if url.IsBucket():
        self._print_bucket_header_func(url)
      return self._RecurseExpandUrlAndPrint(url.url_string,
                                            print_initial_newline=False)
    else:
      # User provided a prefix or object URL, but it's impossible to tell
      # which until we do a listing and see what matches.
      top_level_iterator = PluralityCheckableIterator(self._iterator_func(
          url.CreatePrefixUrl(wildcard_suffix=None),
          all_versions=self.all_versions).IterAll(
              expand_top_level_buckets=True,
              bucket_listing_fields=self.bucket_listing_fields))
      plurality = top_level_iterator.HasPlurality()

      for blr in top_level_iterator:
        if self._MatchesExcludedPattern(blr):
          continue
        if blr.IsObject():
          nd = 0
          no, nb = self._print_object_func(blr)
          print_newline = True
        elif blr.IsPrefix():
          if print_newline:
            self._print_newline_func()
          else:
            print_newline = True
          if plurality:
            self._print_dir_header_func(blr)
          expansion_url_str = StorageUrlFromString(
              blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
          nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
          self._print_dir_summary_func(nb, blr)
        else:
          # We handle all buckets at the top level, so this should never happen.
          raise CommandException(
              'Sub-level iterator returned a CsBucketListingRef of type Bucket')
        num_objects += no
        num_dirs += nd
        num_bytes += nb
      return num_dirs, num_objects, num_bytes

  def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
    """Iterates over the given URL string and calls print functions.

    Args:
      url_str: String describing StorageUrl to iterate over.
               Must be of depth one or higher.
      print_initial_newline: If true, print a newline before recursively
                             expanded prefixes.

    Returns:
      (num_objects, num_bytes) total number of objects and bytes iterated.
    """
    num_objects = 0
    num_dirs = 0
    num_bytes = 0
    for blr in self._iterator_func(
        '%s' % url_str, all_versions=self.all_versions).IterAll(
            expand_top_level_buckets=True,
            bucket_listing_fields=self.bucket_listing_fields):
      if self._MatchesExcludedPattern(blr):
        continue

      if blr.IsObject():
        nd = 0
        no, nb = self._print_object_func(blr)
      elif blr.IsPrefix():
        if self.should_recurse:
          if print_initial_newline:
            self._print_newline_func()
          else:
            print_initial_newline = True
          self._print_dir_header_func(blr)
          expansion_url_str = StorageUrlFromString(
              blr.url_string).CreatePrefixUrl(wildcard_suffix='*')

          nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
          self._print_dir_summary_func(nb, blr)
        else:
          nd, no, nb = 1, 0, 0
          self._print_dir_func(blr)
      else:
        # We handle all buckets at the top level, so this should never happen.
        raise CommandException(
            'Sub-level iterator returned a bucketListingRef of type Bucket')
      num_dirs += nd
      num_objects += no
      num_bytes += nb

    return num_dirs, num_objects, num_bytes

  def _MatchesExcludedPattern(self, blr):
    """Checks bucket listing reference against patterns to exclude.

    Args:
      blr: BucketListingRef to check.

    Returns:
      True if reference matches a pattern and should be excluded.
    """
    if self.exclude_patterns:
      tomatch = blr.url_string
      for pattern in self.exclude_patterns:
        if fnmatch.fnmatch(tomatch, pattern):
          return True
    return False