src/cobalt/media/tools/layout_tests/layouttests.py - cobalt - Git at Google

 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Layout tests module that is necessary for the layout analyzer.

 Layout tests are stored in an SVN repository and LayoutTestCaseManager collects
 these layout test cases (including description).
 """

 import copy
 import csv
 import locale
 import re
 import sys
 import urllib2

 import pysvn

 # LayoutTests SVN root location.
 DEFAULT_LAYOUTTEST_LOCATION = (
     'http://src.chromium.org/blink/trunk/LayoutTests/')
 # LayoutTests SVN view link
 DEFAULT_LAYOUTTEST_SVN_VIEW_LOCATION = (
     'http://src.chromium.org/viewvc/blink/trunk/LayoutTests/')


 # When parsing the test HTML file and finding the test description,
 # this script tries to find the test description using sentences
 # starting with these keywords. This is adhoc but it is the only way
 # since there is no standard for writing test description.
 KEYWORDS_FOR_TEST_DESCRIPTION = ['This test', 'Tests that', 'Test ']

 # If cannot find the keywords, this script tries to find test case
 # description by the following tags.
 TAGS_FOR_TEST_DESCRIPTION = ['title', 'p', 'div']

 # If cannot find the tags, this script tries to find the test case
 # description in the sentence containing following words.
 KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE = ['PASSED ', 'PASS:']


 class LayoutTests(object):
   """A class to store test names in layout tests.

   The test names (including regular expression patterns) are read from a CSV
   file and used for getting layout test names from repository.
   """

   def __init__(self, layouttest_root_path=DEFAULT_LAYOUTTEST_LOCATION,
                parent_location_list=None, filter_names=None,
                recursion=False):
     """Initialize LayoutTests using root and CSV file.

     Args:
       layouttest_root_path: A location string where layout tests are stored.
       parent_location_list: A list of parent directories that are needed for
           getting layout tests.
       filter_names: A list of test name patterns that are used for filtering
           test names (e.g., media/*.html).
       recursion: a boolean indicating whether the test names are sought
           recursively.
     """

     if layouttest_root_path.startswith('http://'):
       name_map = self.GetLayoutTestNamesFromSVN(parent_location_list,
                                                 layouttest_root_path,
                                                 recursion)
     else:
       # TODO(imasaki): support other forms such as CSV for reading test names.
       pass
     self.name_map = copy.copy(name_map)
     if filter_names:
       # Filter names.
       for lt_name in name_map.iterkeys():
         match = False
         for filter_name in filter_names:
           if re.search(filter_name, lt_name):
             match = True
             break
         if not match:
           del self.name_map[lt_name]
     # We get description only for the filtered names.
     for lt_name in self.name_map.iterkeys():
       self.name_map[lt_name] = 'No description available'

   @staticmethod
   def ExtractTestDescription(txt):
     """Extract the description description from test code in HTML.

     Currently, we have 4 rules described in the code below.
     (This example falls into rule 1):
       <p>
       This tests the intrinsic size of a video element is the default
       300,150 before metadata is loaded, and 0,0 after
       metadata is loaded for an audio-only file.
       </p>
     The strategy is very adhoc since the original test case files
     (in HTML format) do not have standard way to store test description.

     Args:
       txt: A HTML text which may or may not contain test description.

     Returns:
       A string that contains test description. Returns 'UNKNOWN' if the
           test description is not found.
     """
     # (1) Try to find test description that contains keywords such as
     #     'test that' and surrounded by p tag.
     #     This is the most common case.
     for keyword in KEYWORDS_FOR_TEST_DESCRIPTION:
       # Try to find <p> and </p>.
       pattern = r'<p>(.*' + keyword + '.*)</p>'
       matches = re.search(pattern, txt)
       if matches is not None:
         return matches.group(1).strip()

     # (2) Try to find it by using more generic keywords such as 'PASS' etc.
     for keyword in KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE:
       # Try to find new lines.
       pattern = r'\n(.*' + keyword + '.*)\n'
       matches = re.search(pattern, txt)
       if matches is not None:
         # Remove 'p' tag.
         text = matches.group(1).strip()
         return text.replace('<p>', '').replace('</p>', '')

     # (3) Try to find it by using HTML tag such as title.
     for tag in TAGS_FOR_TEST_DESCRIPTION:
       pattern = r'<' + tag + '>(.*)</' + tag + '>'
       matches = re.search(pattern, txt)
       if matches is not None:
         return matches.group(1).strip()

     # (4) Try to find it by using test description and remove 'p' tag.
     for keyword in KEYWORDS_FOR_TEST_DESCRIPTION:
       # Try to find <p> and </p>.
       pattern = r'\n(.*' + keyword + '.*)\n'
       matches = re.search(pattern, txt)
       if matches is not None:
         # Remove 'p' tag.
         text = matches.group(1).strip()
         return text.replace('<p>', '').replace('</p>', '')

     # (5) cannot find test description using existing rules.
     return 'UNKNOWN'

   @staticmethod
   def GetLayoutTestNamesFromSVN(parent_location_list,
                                 layouttest_root_path, recursion):
     """Get LayoutTest names from SVN.

     Args:
       parent_location_list: a list of locations of parent directories. This is
           used when getting layout tests using PySVN.list().
       layouttest_root_path: the root path of layout tests directory.
       recursion: a boolean indicating whether the test names are sought
           recursively.

     Returns:
       a map containing test names as keys for de-dupe.
     """
     client = pysvn.Client()
     # Get directory structure in the repository SVN.
     name_map = {}
     for parent_location in parent_location_list:
       if parent_location.endswith('/'):
         full_path = layouttest_root_path + parent_location
         try:
           file_list = client.list(full_path, recurse=recursion)
           for file_name in file_list:
             if sys.stdout.isatty():
               default_encoding = sys.stdout.encoding
             else:
               default_encoding = locale.getpreferredencoding()
             file_name = file_name[0].repos_path.encode(default_encoding)
             # Remove the word '/truck/LayoutTests'.
             file_name = file_name.replace('/trunk/LayoutTests/', '')
             if file_name.endswith('.html'):
               name_map[file_name] = True
         except:
           print 'Unable to list tests in %s.' % full_path
     return name_map

   @staticmethod
   def GetLayoutTestNamesFromCSV(csv_file_path):
     """Get layout test names from CSV file.

     Args:
       csv_file_path: the path for the CSV file containing test names (including
           regular expression patterns). The CSV file content has one column and
           each row contains a test name.

     Returns:
        a list of test names in string.
     """
     file_object = file(csv_file_path, 'r')
     reader = csv.reader(file_object)
     names = [row[0] for row in reader]
     file_object.close()
     return names

   @staticmethod
   def GetParentDirectoryList(names):
     """Get parent directory list from test names.

     Args:
       names: a list of test names. The test names also have path information as
           well (e.g., media/video-zoom.html).

     Returns:
       a list of parent directories for the given test names.
     """
     pd_map = {}
     for name in names:
       p_dir = name[0:name.rfind('/') + 1]
       pd_map[p_dir] = True
     return list(pd_map.iterkeys())

   def JoinWithTestExpectation(self, test_expectations):
     """Join layout tests with the test expectation file using test name as key.

     Args:
       test_expectations: a test expectations object.

     Returns:
       test_info_map contains test name as key and another map as value. The
           other map contains test description and the test expectation
           information which contains keyword (e.g., 'GPU') as key (we do
           not care about values). The map data structure is used since we
           have to look up these keywords several times.
     """
     test_info_map = {}
     for (lt_name, desc) in self.name_map.items():
       test_info_map[lt_name] = {}
       test_info_map[lt_name]['desc'] = desc
       for (te_name, te_info) in (
           test_expectations.all_test_expectation_info.items()):
         if te_name == lt_name or (
             te_name in lt_name and te_name.endswith('/')):
           # Only keep the first match when found.
           test_info_map[lt_name]['te_info'] = te_info
           break
     return test_info_map
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Layout tests module that is necessary for the layout analyzer.

	Layout tests are stored in an SVN repository and LayoutTestCaseManager collects
	these layout test cases (including description).
	"""

	import copy
	import csv
	import locale
	import re
	import sys
	import urllib2

	import pysvn

	# LayoutTests SVN root location.
	DEFAULT_LAYOUTTEST_LOCATION = (
	'http://src.chromium.org/blink/trunk/LayoutTests/')
	# LayoutTests SVN view link
	DEFAULT_LAYOUTTEST_SVN_VIEW_LOCATION = (
	'http://src.chromium.org/viewvc/blink/trunk/LayoutTests/')


	# When parsing the test HTML file and finding the test description,
	# this script tries to find the test description using sentences
	# starting with these keywords. This is adhoc but it is the only way
	# since there is no standard for writing test description.
	KEYWORDS_FOR_TEST_DESCRIPTION = ['This test', 'Tests that', 'Test ']

	# If cannot find the keywords, this script tries to find test case
	# description by the following tags.
	TAGS_FOR_TEST_DESCRIPTION = ['title', 'p', 'div']

	# If cannot find the tags, this script tries to find the test case
	# description in the sentence containing following words.
	KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE = ['PASSED ', 'PASS:']


	class LayoutTests(object):
	"""A class to store test names in layout tests.

	The test names (including regular expression patterns) are read from a CSV
	file and used for getting layout test names from repository.
	"""

	def __init__(self, layouttest_root_path=DEFAULT_LAYOUTTEST_LOCATION,
	parent_location_list=None, filter_names=None,
	recursion=False):
	"""Initialize LayoutTests using root and CSV file.

	Args:
	layouttest_root_path: A location string where layout tests are stored.
	parent_location_list: A list of parent directories that are needed for
	getting layout tests.
	filter_names: A list of test name patterns that are used for filtering
	test names (e.g., media/*.html).
	recursion: a boolean indicating whether the test names are sought
	recursively.
	"""

	if layouttest_root_path.startswith('http://'):
	name_map = self.GetLayoutTestNamesFromSVN(parent_location_list,
	layouttest_root_path,
	recursion)
	else:
	# TODO(imasaki): support other forms such as CSV for reading test names.
	pass
	self.name_map = copy.copy(name_map)
	if filter_names:
	# Filter names.
	for lt_name in name_map.iterkeys():
	match = False
	for filter_name in filter_names:
	if re.search(filter_name, lt_name):
	match = True
	break
	if not match:
	del self.name_map[lt_name]
	# We get description only for the filtered names.
	for lt_name in self.name_map.iterkeys():
	self.name_map[lt_name] = 'No description available'

	@staticmethod
	def ExtractTestDescription(txt):
	"""Extract the description description from test code in HTML.

	Currently, we have 4 rules described in the code below.
	(This example falls into rule 1):
	<p>
	This tests the intrinsic size of a video element is the default
	300,150 before metadata is loaded, and 0,0 after
	metadata is loaded for an audio-only file.
	</p>
	The strategy is very adhoc since the original test case files
	(in HTML format) do not have standard way to store test description.

	Args:
	txt: A HTML text which may or may not contain test description.

	Returns:
	A string that contains test description. Returns 'UNKNOWN' if the
	test description is not found.
	"""
	# (1) Try to find test description that contains keywords such as
	# 'test that' and surrounded by p tag.
	# This is the most common case.
	for keyword in KEYWORDS_FOR_TEST_DESCRIPTION:
	# Try to find <p> and </p>.
	pattern = r'<p>(.' + keyword + '.)</p>'
	matches = re.search(pattern, txt)
	if matches is not None:
	return matches.group(1).strip()

	# (2) Try to find it by using more generic keywords such as 'PASS' etc.
	for keyword in KEYWORD_FOR_TEST_DESCRIPTION_FAIL_SAFE:
	# Try to find new lines.
	pattern = r'\n(.' + keyword + '.)\n'
	matches = re.search(pattern, txt)
	if matches is not None:
	# Remove 'p' tag.
	text = matches.group(1).strip()
	return text.replace('<p>', '').replace('</p>', '')

	# (3) Try to find it by using HTML tag such as title.
	for tag in TAGS_FOR_TEST_DESCRIPTION:
	pattern = r'<' + tag + '>(.*)</' + tag + '>'
	matches = re.search(pattern, txt)
	if matches is not None:
	return matches.group(1).strip()

	# (4) Try to find it by using test description and remove 'p' tag.
	for keyword in KEYWORDS_FOR_TEST_DESCRIPTION:
	# Try to find <p> and </p>.
	pattern = r'\n(.' + keyword + '.)\n'
	matches = re.search(pattern, txt)
	if matches is not None:
	# Remove 'p' tag.
	text = matches.group(1).strip()
	return text.replace('<p>', '').replace('</p>', '')

	# (5) cannot find test description using existing rules.
	return 'UNKNOWN'

	@staticmethod
	def GetLayoutTestNamesFromSVN(parent_location_list,
	layouttest_root_path, recursion):
	"""Get LayoutTest names from SVN.

	Args:
	parent_location_list: a list of locations of parent directories. This is
	used when getting layout tests using PySVN.list().
	layouttest_root_path: the root path of layout tests directory.
	recursion: a boolean indicating whether the test names are sought
	recursively.

	Returns:
	a map containing test names as keys for de-dupe.
	"""
	client = pysvn.Client()
	# Get directory structure in the repository SVN.
	name_map = {}
	for parent_location in parent_location_list:
	if parent_location.endswith('/'):
	full_path = layouttest_root_path + parent_location
	try:
	file_list = client.list(full_path, recurse=recursion)
	for file_name in file_list:
	if sys.stdout.isatty():
	default_encoding = sys.stdout.encoding
	else:
	default_encoding = locale.getpreferredencoding()
	file_name = file_name[0].repos_path.encode(default_encoding)
	# Remove the word '/truck/LayoutTests'.
	file_name = file_name.replace('/trunk/LayoutTests/', '')
	if file_name.endswith('.html'):
	name_map[file_name] = True
	except:
	print 'Unable to list tests in %s.' % full_path
	return name_map

	@staticmethod
	def GetLayoutTestNamesFromCSV(csv_file_path):
	"""Get layout test names from CSV file.

	Args:
	csv_file_path: the path for the CSV file containing test names (including
	regular expression patterns). The CSV file content has one column and
	each row contains a test name.

	Returns:
	a list of test names in string.
	"""
	file_object = file(csv_file_path, 'r')
	reader = csv.reader(file_object)
	names = [row[0] for row in reader]
	file_object.close()
	return names

	@staticmethod
	def GetParentDirectoryList(names):
	"""Get parent directory list from test names.

	Args:
	names: a list of test names. The test names also have path information as
	well (e.g., media/video-zoom.html).

	Returns:
	a list of parent directories for the given test names.
	"""
	pd_map = {}
	for name in names:
	p_dir = name[0:name.rfind('/') + 1]
	pd_map[p_dir] = True
	return list(pd_map.iterkeys())

	def JoinWithTestExpectation(self, test_expectations):
	"""Join layout tests with the test expectation file using test name as key.

	Args:
	test_expectations: a test expectations object.

	Returns:
	test_info_map contains test name as key and another map as value. The
	other map contains test description and the test expectation
	information which contains keyword (e.g., 'GPU') as key (we do
	not care about values). The map data structure is used since we
	have to look up these keywords several times.
	"""
	test_info_map = {}
	for (lt_name, desc) in self.name_map.items():
	test_info_map[lt_name] = {}
	test_info_map[lt_name]['desc'] = desc
	for (te_name, te_info) in (
	test_expectations.all_test_expectation_info.items()):
	if te_name == lt_name or (
	te_name in lt_name and te_name.endswith('/')):
	# Only keep the first match when found.
	test_info_map[lt_name]['te_info'] = te_info
	break
	return test_info_map