#!/usr/bin/python
# Copyright (c) 2010 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Class for parsing metadata about extension samples."""

import locale
import os
import os.path
import re
import hashlib
import zipfile
import simplejson as json

# Make sure we get consistent string sorting behavior by explicitly using the
# default C locale.
locale.setlocale(locale.LC_ALL, 'C')

def sorted_walk(path):
  """ A version of os.walk that yields results in order sorted by name.

  This is to prevent spurious docs changes due to os.walk returning items in a
  filesystem dependent order (by inode creation time, etc).
  """
  for base, dirs, files in os.walk(path):
    dirs.sort()
    files.sort()
    yield base, dirs, files

def parse_json_file(path, encoding="utf-8"):
  """ Load the specified file and parse it as JSON.

  Args:
    path: Path to a file containing JSON-encoded data.
    encoding: Encoding used in the file.  Defaults to utf-8.

  Returns:
    A Python object representing the data encoded in the file.

  Raises:
    Exception: If the file could not be read or its contents could not be
        parsed as JSON data.
  """
  try:
    json_file = open(path, 'r')
  except IOError, msg:
    raise Exception("Failed to read the file at %s: %s" % (path, msg))

  try:
    json_obj = json.load(json_file, encoding)
  except ValueError, msg:
    raise Exception("Failed to parse JSON out of file %s: %s" % (path, msg))
  finally:
    json_file.close()

  return json_obj

class ApiManifest(object):
  """ Represents the list of API methods contained in extension_api.json """

  _MODULE_DOC_KEYS = ['functions', 'events']
  """ Keys which may be passed to the _parseModuleDocLinksByKey method."""

  def __init__(self, manifest_path):
    """ Read the supplied manifest file and parse its contents.

    Args:
      manifest_path: Path to extension_api.json
    """
    self._manifest = parse_json_file(manifest_path)

  def _getDocLink(self, method, hashprefix):
    """
    Given an API method, return a partial URL corresponding to the doc
    file for that method.

    Args:
      method: A string like 'chrome.foo.bar' or 'chrome.experimental.foo.onBar'
      hashprefix: The prefix to put in front of hash links - 'method' for
          methods and 'event' for events.

    Returns:
      A string like 'foo.html#method-bar' or 'experimental.foo.html#event-onBar'
    """
    urlpattern = '%%s.html#%s-%%s' % hashprefix
    urlparts = tuple(method.replace('chrome.', '').rsplit('.', 1))
    return urlpattern % urlparts

  def _parseModuleDocLinksByKey(self, module, key):
    """
    Given a specific API module, returns a dict of methods or events mapped to
    documentation URLs.

    Args:
      module: The data in extension_api.json corresponding to a single module.
      key: A key belonging to _MODULE_DOC_KEYS to determine which set of
          methods to parse, and what kind of documentation URL to generate.

    Returns:
      A dict of extension methods mapped to file and hash URL parts for the
      corresponding documentation links, like:
        {
          "chrome.tabs.remove": "tabs.html#method-remove",
          "chrome.tabs.onDetached" : "tabs.html#event-onDetatched"
        }

      If the API namespace is defined "nodoc" then an empty dict is returned.

    Raises:
      Exception: If the key supplied is not a member of _MODULE_DOC_KEYS.
    """
    methods = []
    api_dict = {}
    namespace = module['namespace']
    if module.has_key('nodoc'):
      return api_dict
    if key not in self._MODULE_DOC_KEYS:
      raise Exception("key %s must be one of %s" % (key, self._MODULE_DOC_KEYS))
    if module.has_key(key):
      methods.extend(module[key])
    for method in methods:
      method_name = 'chrome.%s.%s' % (namespace, method['name'])
      hashprefix = 'method'
      if key == 'events':
        hashprefix = 'event'
      api_dict[method_name] = self._getDocLink(method_name, hashprefix)
    return api_dict

  def getModuleNames(self):
    """ Returns the names of individual modules in the API.

    Returns:
      The namespace """
    # Exclude modules with a "nodoc" property.
    return set(module['namespace'].encode() for module in self._manifest
               if "nodoc" not in module)

  def getDocumentationLinks(self):
    """ Parses the extension_api.json manifest and returns a dict of all
    events and methods for every module, mapped to relative documentation links.

    Returns:
      A dict of methods/events => partial doc links for every module.
    """
    api_dict = {}
    for module in self._manifest:
      api_dict.update(self._parseModuleDocLinksByKey(module, 'functions'))
      api_dict.update(self._parseModuleDocLinksByKey(module, 'events'))
    return api_dict

class SamplesManifest(object):
  """ Represents a manifest file containing information about the sample
  extensions available in the codebase. """

  def __init__(self, base_sample_path, base_dir, api_manifest):
    """ Reads through the filesystem and obtains information about any Chrome
    extensions which exist underneath the specified folder.

    Args:
      base_sample_path: The directory under which to search for samples.
      base_dir: The base directory samples will be referenced from.
      api_manifest: An instance of the ApiManifest class, which will indicate
          which API methods are available.
    """
    self._base_dir = base_dir
    manifest_paths = self._locateManifestsFromPath(base_sample_path)
    self._manifest_data = self._parseManifestData(manifest_paths, api_manifest)

  def _locateManifestsFromPath(self, path):
    """
    Returns a list of paths to sample extension manifest.json files.

    Args:
      base_path: Base path in which to start the search.
    Returns:
      A list of paths below base_path pointing at manifest.json files.
    """
    manifest_paths = []
    for root, directories, files in sorted_walk(path):
      if 'manifest.json' in files:
        directories = []             # Don't go any further down this tree
        manifest_paths.append(os.path.join(root, 'manifest.json'))
      if '.svn' in directories:
        directories.remove('.svn')   # Don't go into SVN metadata directories
    return manifest_paths

  def _parseManifestData(self, manifest_paths, api_manifest):
    """ Returns metadata about the sample extensions given their manifest
    paths.

    Args:
      manifest_paths: A list of paths to extension manifests
      api_manifest: An instance of the ApiManifest class, which will indicate
          which API methods are available.

    Returns:
      Manifest data containing a list of samples and available API methods.
    """
    api_method_dict = api_manifest.getDocumentationLinks()
    api_methods = api_method_dict.keys()

    samples = []
    for path in manifest_paths:
      sample = Sample(path, api_methods, self._base_dir)
      # Don't render apps
      if sample.is_app() == False:
        samples.append(sample)

    def compareSamples(sample1, sample2):
      """ Compares two samples as a sort comparator, by name then path. """
      value = cmp(sample1['name'].upper(), sample2['name'].upper())
      if value == 0:
        value = cmp(sample1['path'], sample2['path'])
      return value

    samples.sort(compareSamples)

    manifest_data = {'samples': samples, 'api': api_method_dict}
    return manifest_data

  def writeToFile(self, path):
    """ Writes the contents of this manifest file as a JSON-encoded text file.

    Args:
      path: The path to write the samples manifest file to.
    """
    manifest_text = json.dumps(self._manifest_data, indent=2,
                               sort_keys=True, separators=(',', ': '))
    output_path = os.path.realpath(path)
    try:
      output_file = open(output_path, 'w')
    except IOError, msg:
      raise Exception("Failed to write the samples manifest file."
                      "The specific error was: %s." % msg)
    output_file.write(manifest_text)
    output_file.close()

  def writeZippedSamples(self):
    """ For each sample in the current manifest, create a zip file with the
    sample contents in the sample's parent directory if not zip exists, or
    update the zip file if the sample has been updated.

    Returns:
      A set of paths representing zip files which have been modified.
    """
    modified_paths = []
    for sample in self._manifest_data['samples']:
      path = sample.write_zip()
      if path:
        modified_paths.append(path)
    return modified_paths

class Sample(dict):
  """ Represents metadata about a Chrome extension sample.

  Extends dict so that it can be easily JSON serialized.
  """

  def __init__(self, manifest_path, api_methods, base_dir):
    """ Initializes a Sample instance given a path to a manifest.

    Args:
      manifest_path: A filesystem path to a manifest file.
      api_methods: A list of strings containing all possible Chrome extension
          API calls.
      base_dir: The base directory where this sample will be referenced from -
          paths will be made relative to this directory.
    """
    self._base_dir = base_dir
    self._manifest_path = manifest_path
    self._manifest = parse_json_file(self._manifest_path)
    self._locale_data = self._parse_locale_data()

    # The following calls set data which will be serialized when converting
    # this object to JSON.
    source_data = self._parse_source_data(api_methods)
    self['api_calls'] = source_data['api_calls']
    self['source_files'] = source_data['source_files']
    self['source_hash'] = source_data['source_hash']

    self['name'] = self._parse_name()
    self['description'] = self._parse_description()
    self['icon'] = self._parse_icon()
    self['features'] = self._parse_features()
    self['protocols'] = self._parse_protocols()
    self['path'] = self._get_relative_path()
    self['search_string'] = self._get_search_string()
    self['id'] = hashlib.sha1(self['path']).hexdigest()
    self['zip_path'] = self._get_relative_zip_path()

  _FEATURE_ATTRIBUTES = (
    'browser_action',
    'page_action',
    'background_page',
    'options_page',
    'plugins',
    'theme',
    'chrome_url_overrides'
  )
  """ Attributes that will map to "features" if their corresponding key is
  present in the extension manifest. """

  _SOURCE_FILE_EXTENSIONS = ('.html', '.json', '.js', '.css', '.htm')
  """ File extensions to files which may contain source code."""

  _ENGLISH_LOCALES = ['en_US', 'en', 'en_GB']
  """ Locales from which translations may be used in the sample gallery. """

  def _get_localized_manifest_value(self, key):
    """ Returns a localized version of the requested manifest value.

    Args:
      key: The manifest key whose value the caller wants translated.

    Returns:
      If the supplied value exists and contains a ___MSG_token___ value, this
      method will resolve the appropriate translation and return the result.
      If no token exists, the manifest value will be returned.  If the key does
      not exist, an empty string will be returned.

    Raises:
      Exception: If the localized value for the given token could not be found.
    """
    if self._manifest.has_key(key):
      if self._manifest[key][:6] == '__MSG_':
        try:
          return self._get_localized_value(self._manifest[key])
        except Exception, msg:
          raise Exception("Could not translate manifest value for key %s: %s" %
                          (key, msg))
      else:
        return self._manifest[key]
    else:
      return ''

  def _get_localized_value(self, message_token):
    """ Returns the localized version of the requested MSG bundle token.

    Args:
      message_token: A message bundle token like __MSG_extensionName__.

    Returns:
      The translated text corresponding to the token, with any placeholders
      automatically resolved and substituted in.

    Raises:
      Exception: If a message bundle token is not found in the translations.
    """
    placeholder_pattern = re.compile('\$(\w*)\$')
    token = message_token[6:-2]
    if self._locale_data.has_key(token):
      message = self._locale_data[token]['message']

      placeholder_match = placeholder_pattern.search(message)
      if placeholder_match:
        # There are placeholders in the translation - substitute them.
        placeholder_name = placeholder_match.group(1)
        placeholders = self._locale_data[token]['placeholders']
        if placeholders.has_key(placeholder_name.lower()):
          placeholder_value = placeholders[placeholder_name.lower()]['content']
          placeholder_token = '$%s$' % placeholder_name
          message = message.replace(placeholder_token, placeholder_value)
      return message
    else:
      raise Exception('Could not find localized string: %s' % message_token)

  def _get_relative_path(self):
    """ Returns a relative path from the supplied base dir to the manifest dir.

    This method is used because we may not be able to rely on os.path.relpath
    which was introduced in Python 2.6 and only works on Windows and Unix.

    Since the example extensions should always be subdirectories of the
    base sample manifest path, we can get a relative path through a simple
    string substitution.

    Returns:
      A relative directory path from the sample manifest's directory to the
      directory containing this sample's manifest.json.
    """
    real_manifest_path = os.path.realpath(self._manifest_path)
    real_base_path = os.path.realpath(self._base_dir)
    return real_manifest_path.replace(real_base_path, '')\
                             .replace('manifest.json', '')[1:]

  def _get_relative_zip_path(self):
    """ Returns a relative path from the base dir to the sample's zip file.

    Intended for locating the zip file for the sample in the samples manifest.

    Returns:
      A relative directory path form the sample manifest's directory to this
      sample's zip file.
    """
    zip_filename = self._get_zip_filename()
    zip_relpath = os.path.dirname(os.path.dirname(self._get_relative_path()))
    return os.path.join(zip_relpath, zip_filename)

  def _get_search_string(self):
    """ Constructs a string to be used when searching the samples list.

    To make the implementation of the JavaScript-based search very direct, a
    string is constructed containing the title, description, API calls, and
    features that this sample uses, and is converted to uppercase.  This makes
    JavaScript sample searching very fast and easy to implement.

    Returns:
      An uppercase string containing information to match on for searching
      samples on the client.
    """
    search_terms = [
      self['name'],
      self['description'],
    ]
    search_terms.extend(self['features'])
    search_terms.extend(self['api_calls'])
    search_string = ' '.join(search_terms).replace('"', '')\
                                          .replace('\'', '')\
                                          .upper()
    return search_string

  def _get_zip_filename(self):
    """ Returns the filename to be used for a generated zip of the sample.

    Returns:
      A string in the form of "<dirname>.zip" where <dirname> is the name
      of the directory containing this sample's manifest.json.
    """
    sample_path = os.path.realpath(os.path.dirname(self._manifest_path))
    sample_dirname = os.path.basename(sample_path)
    return "%s.zip" % sample_dirname

  def _parse_description(self):
    """ Returns a localized description of the extension.

    Returns:
      A localized version of the sample's description.
    """
    return self._get_localized_manifest_value('description')

  def _parse_features(self):
    """ Returns a list of features the sample uses.

    Returns:
      A list of features the extension uses, as determined by
      self._FEATURE_ATTRIBUTES.
    """
    features = set()
    for feature_attr in self._FEATURE_ATTRIBUTES:
      if self._manifest.has_key(feature_attr):
        features.add(feature_attr)

    if self._uses_popup():
      features.add('popup')

    if self._manifest.has_key('permissions'):
      for permission in self._manifest['permissions']:
        split = permission.split('://')
        if (len(split) == 1):
          features.add(split[0])
    return sorted(features)

  def _parse_icon(self):
    """ Returns the path to the 128px icon for this sample.

    Returns:
      The path to the 128px icon if defined in the manifest, None otherwise.
    """
    if (self._manifest.has_key('icons') and
        self._manifest['icons'].has_key('128')):
      return self._manifest['icons']['128']
    else:
      return None

  def _parse_locale_data(self):
    """ Parses this sample's locale data into a dict.

    Because the sample gallery is in English, this method only looks for
    translations as defined by self._ENGLISH_LOCALES.

    Returns:
      A dict containing the translation keys and corresponding English text
      for this extension.

    Raises:
      Exception: If the messages file cannot be read, or if it is improperly
          formatted JSON.
    """
    en_messages = {}
    extension_dir_path = os.path.dirname(self._manifest_path)
    for locale in self._ENGLISH_LOCALES:
      en_messages_path = os.path.join(extension_dir_path, '_locales', locale,
                                      'messages.json')
      if (os.path.isfile(en_messages_path)):
        break

    if (os.path.isfile(en_messages_path)):
      try:
        en_messages_file = open(en_messages_path, 'r')
      except IOError, msg:
        raise Exception("Failed to read %s: %s" % (en_messages_path, msg))
      en_messages_contents = en_messages_file.read()
      en_messages_file.close()
      try:
        en_messages = json.loads(en_messages_contents)
      except ValueError, msg:
        raise Exception("File %s has a syntax error: %s" %
                        (en_messages_path, msg))
    return en_messages

  def _parse_name(self):
    """ Returns a localized name for the extension.

    Returns:
      A localized version of the sample's name.
    """
    return self._get_localized_manifest_value('name')

  def _parse_protocols(self):
    """ Returns a list of protocols this extension requests permission for.

    Returns:
      A list of every unique protocol listed in the manifest's permssions.
    """
    protocols = []
    if self._manifest.has_key('permissions'):
      for permission in self._manifest['permissions']:
        split = permission.split('://')
        if (len(split) == 2) and (split[0] not in protocols):
          protocols.append(split[0] + "://")
    return protocols

  def _parse_source_data(self, api_methods):
    """ Iterates over the sample's source files and parses data from them.

    Parses any files in the sample directory with known source extensions
    (as defined in self._SOURCE_FILE_EXTENSIONS).  For each file, this method:

       1. Stores a relative path from the manifest.json directory to the file.
       2. Searches through the contents of the file for chrome.* API calls.
       3. Calculates a SHA1 digest for the contents of the file.

    Args:
      api_methods: A list of strings containing the potential
          API calls the and the extension sample could be making.

    Raises:
      Exception: If any of the source files cannot be read.

    Returns:
      A dictionary containing the keys/values:
        'api_calls'     A sorted list of API calls the sample makes.
        'source_files'  A sorted list of paths to files the extension uses.
        'source_hash'   A hash of the individual file hashes.
    """
    data = {}
    source_paths = []
    source_hashes = []
    api_calls = set()
    base_path = os.path.realpath(os.path.dirname(self._manifest_path))
    for root, directories, files in sorted_walk(base_path):
      if '.svn' in directories:
        directories.remove('.svn')   # Don't go into SVN metadata directories

      for file_name in files:
        ext = os.path.splitext(file_name)[1]
        if ext in self._SOURCE_FILE_EXTENSIONS:
          # Add the file path to the list of source paths.
          fullpath = os.path.realpath(os.path.join(root, file_name))
          path = fullpath.replace(base_path, '')[1:]
          source_paths.append(path)

          # Read the contents and parse out API calls.
          try:
            code_file = open(fullpath, "r")
          except IOError, msg:
            raise Exception("Failed to read %s: %s" % (fullpath, msg))
          code_contents = unicode(code_file.read(), errors="replace")
          code_file.close()
          for method in api_methods:
            if (code_contents.find(method) > -1):
              api_calls.add(method)

          # Get a hash of the file contents for zip file generation.
          hash = hashlib.sha1(code_contents.encode("ascii", "replace"))
          source_hashes.append(hash.hexdigest())

    data['api_calls'] = sorted(api_calls)
    data['source_files'] = sorted(source_paths)
    data['source_hash'] = hashlib.sha1(''.join(source_hashes)).hexdigest()
    return data

  def _uses_background(self):
    """ Returns true if the extension defines a background page. """
    return self._manifest.has_key('background_page')

  def _uses_browser_action(self):
    """ Returns true if the extension defines a browser action. """
    return self._manifest.has_key('browser_action')

  def _uses_content_scripts(self):
    """ Returns true if the extension uses content scripts. """
    return self._manifest.has_key('content_scripts')

  def _uses_options(self):
    """ Returns true if the extension defines an options page. """
    return self._manifest.has_key('options_page')

  def _uses_page_action(self):
    """ Returns true if the extension uses a page action. """
    return self._manifest.has_key('page_action')

  def _uses_popup(self):
    """ Returns true if the extension defines a popup on a page or browser
    action. """
    has_b_popup = (self._uses_browser_action() and
                   self._manifest['browser_action'].has_key('popup'))
    has_p_popup = (self._uses_page_action() and
                   self._manifest['page_action'].has_key('popup'))
    return has_b_popup or has_p_popup

  def is_app(self):
    """ Returns true if the extension has an 'app' section in its manifest."""
    return self._manifest.has_key('app')

  def write_zip(self):
    """ Writes a zip file containing all of the files in this Sample's dir."""
    sample_path = os.path.realpath(os.path.dirname(self._manifest_path))
    sample_dirname = os.path.basename(sample_path)
    sample_parentpath = os.path.dirname(sample_path)

    zip_filename = self._get_zip_filename()
    zip_path = os.path.join(sample_parentpath, zip_filename)
    # we pass zip_manifest_path to zipfile.getinfo(), which chokes on
    # backslashes, so don't rely on os.path.join, use forward slash on
    # all platforms.
    zip_manifest_path = sample_dirname + '/manifest.json'

    zipfile.ZipFile.debug = 3

    if os.path.isfile(zip_path):
      try:
        old_zip_file = zipfile.ZipFile(zip_path, 'r')
      except IOError, msg:
        raise Exception("Could not read zip at %s: %s" % (zip_path, msg))
      except zipfile.BadZipfile, msg:
        raise Exception("File at %s is not a zip file: %s" % (zip_path, msg))

      try:
        info = old_zip_file.getinfo(zip_manifest_path)
        hash = info.comment
        if hash == self['source_hash']:
          return None    # Hashes match - no need to generate file
      except KeyError, msg:
        pass             # The old zip file doesn't contain a hash - overwrite
      finally:
        old_zip_file.close()

    zip_file = zipfile.ZipFile(zip_path, 'w')

    try:
      for root, dirs, files in sorted_walk(sample_path):
        if '.svn' in dirs:
          dirs.remove('.svn')
        for file in files:
          # Absolute path to the file to be added.
          abspath = os.path.realpath(os.path.join(root, file))
          # Relative path to store the file in under the zip.
          relpath = sample_dirname + abspath.replace(sample_path, "")

          zip_file.write(abspath, relpath)
          if file == 'manifest.json':
            info = zip_file.getinfo(zip_manifest_path)
            info.comment = self['source_hash']
    except RuntimeError, msg:
      raise Exception("Could not write zip at %s: %s" % (zip_path, msg))
    finally:
      zip_file.close()

    return self._get_relative_zip_path()