# Copyright (c) 2012 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Layout tests module that is necessary for the layout analyzer. Layout tests are stored in an SVN repository and LayoutTestCaseManager collects these layout test cases (including description). """ import copy import csv import locale import re import sys import urllib2 import pysvn # LayoutTests SVN root location. DEFAULT_LAYOUTTEST_LOCATION = ( 'http://src.chromium.org/blink/trunk/LayoutTests/') # LayoutTests SVN view link DEFAULT_LAYOUTTEST_SVN_VIEW_LOCATION = ( 'http://src.chromium.org/viewvc/blink/trunk/LayoutTests/') # When parsing the test HTML file and finding the test description, # this script tries to find the test description using sentences # starting with these keywords. This is adhoc but it is the only way # since there is no standard for writing test description. KEYWORDS_FOR_TEST_DESCRIPTION = ['This test', 'Tests that', 'Test '] # If cannot find the keywords, this script tries to find test case # description by the following tags. TAGS_FOR_TEST_DESCRIPTION = ['title', 'p', 'div'] # If cannot find the tags, this script tries to find the test case # description in the sentence containing following words. KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE = ['PASSED ', 'PASS:'] class LayoutTests(object): """A class to store test names in layout tests. The test names (including regular expression patterns) are read from a CSV file and used for getting layout test names from repository. """ def __init__(self, layouttest_root_path=DEFAULT_LAYOUTTEST_LOCATION, parent_location_list=None, filter_names=None, recursion=False): """Initialize LayoutTests using root and CSV file. Args: layouttest_root_path: A location string where layout tests are stored. parent_location_list: A list of parent directories that are needed for getting layout tests. filter_names: A list of test name patterns that are used for filtering test names (e.g., media/*.html). recursion: a boolean indicating whether the test names are sought recursively. """ if layouttest_root_path.startswith('http://'): name_map = self.GetLayoutTestNamesFromSVN(parent_location_list, layouttest_root_path, recursion) else: # TODO(imasaki): support other forms such as CSV for reading test names. pass self.name_map = copy.copy(name_map) if filter_names: # Filter names. for lt_name in name_map.iterkeys(): match = False for filter_name in filter_names: if re.search(filter_name, lt_name): match = True break if not match: del self.name_map[lt_name] # We get description only for the filtered names. for lt_name in self.name_map.iterkeys(): self.name_map[lt_name] = 'No description available' @staticmethod def ExtractTestDescription(txt): """Extract the description description from test code in HTML. Currently, we have 4 rules described in the code below. (This example falls into rule 1): <p> This tests the intrinsic size of a video element is the default 300,150 before metadata is loaded, and 0,0 after metadata is loaded for an audio-only file. </p> The strategy is very adhoc since the original test case files (in HTML format) do not have standard way to store test description. Args: txt: A HTML text which may or may not contain test description. Returns: A string that contains test description. Returns 'UNKNOWN' if the test description is not found. """ # (1) Try to find test description that contains keywords such as # 'test that' and surrounded by p tag. # This is the most common case. for keyword in KEYWORDS_FOR_TEST_DESCRIPTION: # Try to find <p> and </p>. pattern = r'<p>(.*' + keyword + '.*)</p>' matches = re.search(pattern, txt) if matches is not None: return matches.group(1).strip() # (2) Try to find it by using more generic keywords such as 'PASS' etc. for keyword in KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE: # Try to find new lines. pattern = r'\n(.*' + keyword + '.*)\n' matches = re.search(pattern, txt) if matches is not None: # Remove 'p' tag. text = matches.group(1).strip() return text.replace('<p>', '').replace('</p>', '') # (3) Try to find it by using HTML tag such as title. for tag in TAGS_FOR_TEST_DESCRIPTION: pattern = r'<' + tag + '>(.*)</' + tag + '>' matches = re.search(pattern, txt) if matches is not None: return matches.group(1).strip() # (4) Try to find it by using test description and remove 'p' tag. for keyword in KEYWORDS_FOR_TEST_DESCRIPTION: # Try to find <p> and </p>. pattern = r'\n(.*' + keyword + '.*)\n' matches = re.search(pattern, txt) if matches is not None: # Remove 'p' tag. text = matches.group(1).strip() return text.replace('<p>', '').replace('</p>', '') # (5) cannot find test description using existing rules. return 'UNKNOWN' @staticmethod def GetLayoutTestNamesFromSVN(parent_location_list, layouttest_root_path, recursion): """Get LayoutTest names from SVN. Args: parent_location_list: a list of locations of parent directories. This is used when getting layout tests using PySVN.list(). layouttest_root_path: the root path of layout tests directory. recursion: a boolean indicating whether the test names are sought recursively. Returns: a map containing test names as keys for de-dupe. """ client = pysvn.Client() # Get directory structure in the repository SVN. name_map = {} for parent_location in parent_location_list: if parent_location.endswith('/'): full_path = layouttest_root_path + parent_location try: file_list = client.list(full_path, recurse=recursion) for file_name in file_list: if sys.stdout.isatty(): default_encoding = sys.stdout.encoding else: default_encoding = locale.getpreferredencoding() file_name = file_name[0].repos_path.encode(default_encoding) # Remove the word '/truck/LayoutTests'. file_name = file_name.replace('/trunk/LayoutTests/', '') if file_name.endswith('.html'): name_map[file_name] = True except: print 'Unable to list tests in %s.' % full_path return name_map @staticmethod def GetLayoutTestNamesFromCSV(csv_file_path): """Get layout test names from CSV file. Args: csv_file_path: the path for the CSV file containing test names (including regular expression patterns). The CSV file content has one column and each row contains a test name. Returns: a list of test names in string. """ file_object = file(csv_file_path, 'r') reader = csv.reader(file_object) names = [row[0] for row in reader] file_object.close() return names @staticmethod def GetParentDirectoryList(names): """Get parent directory list from test names. Args: names: a list of test names. The test names also have path information as well (e.g., media/video-zoom.html). Returns: a list of parent directories for the given test names. """ pd_map = {} for name in names: p_dir = name[0:name.rfind('/') + 1] pd_map[p_dir] = True return list(pd_map.iterkeys()) def JoinWithTestExpectation(self, test_expectations): """Join layout tests with the test expectation file using test name as key. Args: test_expectations: a test expectations object. Returns: test_info_map contains test name as key and another map as value. The other map contains test description and the test expectation information which contains keyword (e.g., 'GPU') as key (we do not care about values). The map data structure is used since we have to look up these keywords several times. """ test_info_map = {} for (lt_name, desc) in self.name_map.items(): test_info_map[lt_name] = {} test_info_map[lt_name]['desc'] = desc for (te_name, te_info) in ( test_expectations.all_test_expectation_info.items()): if te_name == lt_name or ( te_name in lt_name and te_name.endswith('/')): # Only keep the first match when found. test_info_map[lt_name]['te_info'] = te_info break return test_info_map