src/third_party/blink/Tools/Scripts/webkitpy/w3c/test_parser.py - cobalt - Git at Google

 # Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #
 # 1. Redistributions of source code must retain the above
 #    copyright notice, this list of conditions and the following
 #    disclaimer.
 # 2. Redistributions in binary form must reproduce the above
 #    copyright notice, this list of conditions and the following
 #    disclaimer in the documentation and/or other materials
 #    provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 # OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.

 import HTMLParser
 import logging
 import re

 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup


 _log = logging.getLogger(__name__)


 class TestParser(object):

     def __init__(self, filename, host):
         self.filename = filename
         self.host = host
         self.filesystem = self.host.filesystem

         self.test_doc = None
         self.ref_doc = None
         self.load_file(filename)

     def load_file(self, filename, is_ref=False):
         if self.filesystem.isfile(filename):
             try:
                 doc = BeautifulSoup(self.filesystem.read_binary_file(filename))
             except IOError:
                 _log.error('IOError: Failed to read %s', filename)
                 doc = None
             except HTMLParser.HTMLParseError:
                 # FIXME: Figure out what to do if we can't parse the file.
                 _log.error('HTMLParseError: Failed to parse %s', filename)
                 doc = None
             except UnicodeEncodeError:
                 _log.error('UnicodeEncodeError while reading %s', filename)
                 doc = None
         else:
             if self.filesystem.isdir(filename):
                 # FIXME: Figure out what is triggering this and what to do about it.
                 _log.error('Trying to load %s, which is a directory', filename)
             doc = None

         if is_ref:
             self.ref_doc = doc
         else:
             self.test_doc = doc

     def analyze_test(self, test_contents=None, ref_contents=None):
         """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.

         Returns: A dict which can have the properties:
             "test": test file name.
             "reference": related reference test file name if this is a reference test.
             "reference_support_info": extra information about the related reference test and any support files.
             "jstest": A boolean, whether this is a JS test.
             If the path doesn't look a test or the given contents are empty,
             then None is returned.
         """
         test_info = None

         if test_contents is None and self.test_doc is None:
             return test_info

         if test_contents is not None:
             self.test_doc = BeautifulSoup(test_contents)

         if ref_contents is not None:
             self.ref_doc = BeautifulSoup(ref_contents)

         # First check if it's a reftest
         matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
         if matches:
             if len(matches) > 1:
                 # FIXME: Is this actually true? We should fix this.
                 _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                              self.filesystem.basename(self.filename))

             try:
                 ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
             except KeyError:
                 # FIXME: Figure out what to do w/ invalid test files.
                 _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                 return None

             if self.ref_doc is None:
                 self.load_file(ref_file, True)

             test_info = {'test': self.filename, 'reference': ref_file}

             # If the ref file does not live in the same directory as the test file, check it for support files.
             test_info['reference_support_info'] = {}
             if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                 reference_support_files = self.support_files(self.ref_doc)
                 if len(reference_support_files) > 0:
                     reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
                         self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                     test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

         elif self.is_jstest():
             test_info = {'test': self.filename, 'jstest': True}

         elif 'csswg-test' in self.filename:
             # In csswg-test, all other files should be manual tests.
             # This function isn't called for non-test files in support/.
             test_info = {'test': self.filename}

         elif '-manual.' in self.filesystem.basename(self.filename):
             # WPT has a naming convention for manual tests.
             test_info = {'test': self.filename}

         return test_info

     def reference_links_of_type(self, reftest_type):
         return self.test_doc.findAll(rel=reftest_type)

     def is_jstest(self):
         """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
         return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

     def support_files(self, doc):
         """Searches the file for all paths specified in url()s or src attributes."""
         support_files = []

         if doc is None:
             return support_files

         elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
         elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

         url_pattern = re.compile(r'url\(.*\)')
         urls = []
         for url in doc.findAll(text=url_pattern):
             url = re.search(url_pattern, url)
             url = re.sub(r'url\([\'\"]?', '', url.group(0))
             url = re.sub(r'[\'\"]?\)', '', url)
             urls.append(url)

         src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
         href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

         paths = src_paths + href_paths + urls
         for path in paths:
             if not path.startswith('http:') and not path.startswith('mailto:'):
                 uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:')
                 if not uri_scheme_pattern.match(path):
                     support_files.append(path)

         return support_files
	# Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions
	# are met:
	#
	# 1. Redistributions of source code must retain the above
	# copyright notice, this list of conditions and the following
	# disclaimer.
	# 2. Redistributions in binary form must reproduce the above
	# copyright notice, this list of conditions and the following
	# disclaimer in the documentation and/or other materials
	# provided with the distribution.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
	# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
	# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
	# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
	# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	# SUCH DAMAGE.

	import HTMLParser
	import logging
	import re

	from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup


	_log = logging.getLogger(__name__)


	class TestParser(object):

	def __init__(self, filename, host):
	self.filename = filename
	self.host = host
	self.filesystem = self.host.filesystem

	self.test_doc = None
	self.ref_doc = None
	self.load_file(filename)

	def load_file(self, filename, is_ref=False):
	if self.filesystem.isfile(filename):
	try:
	doc = BeautifulSoup(self.filesystem.read_binary_file(filename))
	except IOError:
	_log.error('IOError: Failed to read %s', filename)
	doc = None
	except HTMLParser.HTMLParseError:
	# FIXME: Figure out what to do if we can't parse the file.
	_log.error('HTMLParseError: Failed to parse %s', filename)
	doc = None
	except UnicodeEncodeError:
	_log.error('UnicodeEncodeError while reading %s', filename)
	doc = None
	else:
	if self.filesystem.isdir(filename):
	# FIXME: Figure out what is triggering this and what to do about it.
	_log.error('Trying to load %s, which is a directory', filename)
	doc = None

	if is_ref:
	self.ref_doc = doc
	else:
	self.test_doc = doc

	def analyze_test(self, test_contents=None, ref_contents=None):
	"""Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.

	Returns: A dict which can have the properties:
	"test": test file name.
	"reference": related reference test file name if this is a reference test.
	"reference_support_info": extra information about the related reference test and any support files.
	"jstest": A boolean, whether this is a JS test.
	If the path doesn't look a test or the given contents are empty,
	then None is returned.
	"""
	test_info = None

	if test_contents is None and self.test_doc is None:
	return test_info

	if test_contents is not None:
	self.test_doc = BeautifulSoup(test_contents)

	if ref_contents is not None:
	self.ref_doc = BeautifulSoup(ref_contents)

	# First check if it's a reftest
	matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
	if matches:
	if len(matches) > 1:
	# FIXME: Is this actually true? We should fix this.
	_log.warning('Multiple references are not supported. Importing the first ref defined in %s',
	self.filesystem.basename(self.filename))

	try:
	ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
	except KeyError:
	# FIXME: Figure out what to do w/ invalid test files.
	_log.error('%s has a reference link but is missing the "href"', self.filesystem)
	return None

	if self.ref_doc is None:
	self.load_file(ref_file, True)

	test_info = {'test': self.filename, 'reference': ref_file}

	# If the ref file does not live in the same directory as the test file, check it for support files.
	test_info['reference_support_info'] = {}
	if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
	reference_support_files = self.support_files(self.ref_doc)
	if len(reference_support_files) > 0:
	reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
	self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
	test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

	elif self.is_jstest():
	test_info = {'test': self.filename, 'jstest': True}

	elif 'csswg-test' in self.filename:
	# In csswg-test, all other files should be manual tests.
	# This function isn't called for non-test files in support/.
	test_info = {'test': self.filename}

	elif '-manual.' in self.filesystem.basename(self.filename):
	# WPT has a naming convention for manual tests.
	test_info = {'test': self.filename}

	return test_info

	def reference_links_of_type(self, reftest_type):
	return self.test_doc.findAll(rel=reftest_type)

	def is_jstest(self):
	"""Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
	return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

	def support_files(self, doc):
	"""Searches the file for all paths specified in url()s or src attributes."""
	support_files = []

	if doc is None:
	return support_files

	elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
	elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

	url_pattern = re.compile(r'url\(.*\)')
	urls = []
	for url in doc.findAll(text=url_pattern):
	url = re.search(url_pattern, url)
	url = re.sub(r'url\([\'\"]?', '', url.group(0))
	url = re.sub(r'[\'\"]?\)', '', url)
	urls.append(url)

	src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
	href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

	paths = src_paths + href_paths + urls
	for path in paths:
	if not path.startswith('http:') and not path.startswith('mailto:'):
	uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:')
	if not uri_scheme_pattern.match(path):
	support_files.append(path)

	return support_files