blob: ae08e82d3e41df34d8c67fc0970d60cdebaee1fc [file] [log] [blame]
import sys
import os
import hashlib
import urllib
import itertools
import re
import json
import glob
import shutil
import genshi
from genshi.template import MarkupTemplate
from html5lib.tests import support
except ImportError:
print """This script requires the Genshi templating library and html5lib source
It is recommended that these are installed in a virtualenv:
virtualenv venv
source venv/bin/activate
pip install genshi
cd venv
git clone html5lib
cd html5lib
git submodule init
git submodule update
pip install -e ./
Then run this script again, with the virtual environment still active.
When you are done, type "deactivate" to deactivate the virtual environment.
TESTS_PATH = "html/syntax/parsing/"
def get_paths():
script_path = os.path.split(os.path.abspath(__file__))[0]
repo_base = get_repo_base(script_path)
tests_path = os.path.join(repo_base, TESTS_PATH)
return script_path, tests_path
def get_repo_base(path):
while path:
if os.path.exists(os.path.join(path, ".git")):
return path
path = os.path.split(path)[0]
def get_expected(data):
data = "#document\n" + data
return data
def get_hash(data, container=None):
if container == None:
container = ""
return hashlib.sha1("#container%s#data%s"%(container.encode("utf8"),
def make_tests(script_dir, out_dir, input_file_name, test_data):
tests = []
innerHTML_tests = []
ids_seen = {}
print input_file_name
for test in test_data:
if "script-off" in test:
is_innerHTML = "document-fragment" in test
data = test["data"]
container = test["document-fragment"] if is_innerHTML else None
assert test["document"], test
expected = get_expected(test["document"])
test_list = innerHTML_tests if is_innerHTML else tests
test_id = get_hash(data, container)
if test_id in ids_seen:
print "WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id])
ids_seen[test_id] = (container, data)
path_normal = None
if tests:
path_normal = write_test_file(script_dir, out_dir,
tests, "html5lib_%s"%input_file_name,
path_innerHTML = None
if innerHTML_tests:
path_innerHTML = write_test_file(script_dir, out_dir,
innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name,
return path_normal, path_innerHTML
def write_test_file(script_dir, out_dir, tests, file_name, template_file_name):
file_name = os.path.join(out_dir, file_name + ".html")
short_name = os.path.split(file_name)[1]
with open(os.path.join(script_dir, template_file_name)) as f:
template = MarkupTemplate(f)
stream = template.generate(file_name=short_name, tests=tests)
with open(file_name, "w") as f:
f.write(stream.render('html', doctype='html5',
return file_name
def escape_js_string(in_data):
return in_data.encode("utf8").encode("string-escape")
def serialize_filenames(test_filenames):
return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]"
def main():
script_dir, out_dir = get_paths()
test_files = []
inner_html_files = []
if len(sys.argv) > 2:
test_iterator = itertools.izip(
sorted(os.path.abspath(item) for item in
glob.glob(os.path.join(sys.argv[2], "*.dat"))))
test_iterator = itertools.chain(
os.path.join("tree-construction", "scripted")))))
for (scripted, test_file) in test_iterator:
input_file_name = os.path.splitext(os.path.split(test_file)[1])[0]
if scripted:
input_file_name = "scripted_" + input_file_name
test_data = support.TestData(test_file)
test_filename, inner_html_file_name = make_tests(script_dir, out_dir,
input_file_name, test_data)
if test_filename is not None:
if inner_html_file_name is not None:
if __name__ == "__main__":