W3C Test Import script reformats test HTML
authorbjonesbe@adobe.com <bjonesbe@adobe.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 19 Sep 2013 00:55:50 +0000 (00:55 +0000)
committerbjonesbe@adobe.com <bjonesbe@adobe.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 19 Sep 2013 00:55:50 +0000 (00:55 +0000)
https://bugs.webkit.org/show_bug.cgi?id=119159

Reviewed by Dirk Pranke.

Completely rewrite the test conversion process to minimize
reformatting when adding prefixes, etc. This isn't 100% perfect, there
are still places where it will end up changing the formatting, but it
is much better than before. Most notably, the public interface to the
test converter has changed: now one calls a method instead of creating
an instance of the test converter class. This is because the test
converter class now has state, so one really needs a new instance for
each test.

Note that this also lays some simple groundwork for being able to use
a MockHost in the tests.

* Scripts/webkitpy/w3c/test_converter.py:
(convert_for_webkit):
(_W3CTestConverter):
(_W3CTestConverter.__init__):
(_W3CTestConverter.output):
(_W3CTestConverter.add_webkit_prefix_to_unprefixed_properties):
(_W3CTestConverter.convert_style_data):
(_W3CTestConverter.convert_attributes_if_needed):
(_W3CTestConverter.handle_starttag):
(_W3CTestConverter.handle_endtag):
(_W3CTestConverter.handle_startendtag):
(_W3CTestConverter.handle_data):
(_W3CTestConverter.handle_entityref):
(_W3CTestConverter.handle_charref):
(_W3CTestConverter.handle_comment):
(_W3CTestConverter.handle_decl):
(_W3CTestConverter.handle_pi):
* Scripts/webkitpy/w3c/test_converter_unittest.py:
(W3CTestConverterTest):
(W3CTestConverterTest.fake_dir_path):
(W3CTestConverterTest.test_read_prefixed_property_list):
(verify_no_conversion_happened):
* Scripts/webkitpy/w3c/test_importer.py:
(TestImporter.import_tests):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@156074 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Tools/ChangeLog
Tools/Scripts/webkitpy/w3c/test_converter.py
Tools/Scripts/webkitpy/w3c/test_converter_unittest.py
Tools/Scripts/webkitpy/w3c/test_importer.py

index f8814d9..ea017dc 100644 (file)
@@ -1,3 +1,47 @@
+2013-09-18  Bem Jones-Bey  <bjonesbe@adobe.com>
+
+        W3C Test Import script reformats test HTML
+        https://bugs.webkit.org/show_bug.cgi?id=119159
+
+        Reviewed by Dirk Pranke.
+
+        Completely rewrite the test conversion process to minimize
+        reformatting when adding prefixes, etc. This isn't 100% perfect, there
+        are still places where it will end up changing the formatting, but it
+        is much better than before. Most notably, the public interface to the
+        test converter has changed: now one calls a method instead of creating
+        an instance of the test converter class. This is because the test
+        converter class now has state, so one really needs a new instance for
+        each test.
+
+        Note that this also lays some simple groundwork for being able to use
+        a MockHost in the tests.
+
+        * Scripts/webkitpy/w3c/test_converter.py:
+        (convert_for_webkit):
+        (_W3CTestConverter):
+        (_W3CTestConverter.__init__):
+        (_W3CTestConverter.output):
+        (_W3CTestConverter.add_webkit_prefix_to_unprefixed_properties):
+        (_W3CTestConverter.convert_style_data):
+        (_W3CTestConverter.convert_attributes_if_needed):
+        (_W3CTestConverter.handle_starttag):
+        (_W3CTestConverter.handle_endtag):
+        (_W3CTestConverter.handle_startendtag):
+        (_W3CTestConverter.handle_data):
+        (_W3CTestConverter.handle_entityref):
+        (_W3CTestConverter.handle_charref):
+        (_W3CTestConverter.handle_comment):
+        (_W3CTestConverter.handle_decl):
+        (_W3CTestConverter.handle_pi):
+        * Scripts/webkitpy/w3c/test_converter_unittest.py:
+        (W3CTestConverterTest):
+        (W3CTestConverterTest.fake_dir_path):
+        (W3CTestConverterTest.test_read_prefixed_property_list):
+        (verify_no_conversion_happened):
+        * Scripts/webkitpy/w3c/test_importer.py:
+        (TestImporter.import_tests):
+
 2013-09-18  Filip Pizlo  <fpizlo@apple.com>
 
         Get rid of the jsc-test-list by moving all not-jsc-capable tests into js/dom
index 8114697..2bb0456 100644 (file)
@@ -32,28 +32,56 @@ import re
 
 from webkitpy.common.host import Host
 from webkitpy.common.webkit_finder import WebKitFinder
-from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup, Tag
-
+from HTMLParser import HTMLParser
 
 _log = logging.getLogger(__name__)
 
 
-class W3CTestConverter(object):
+def convert_for_webkit(new_path, filename, host=Host()):
+    """ Converts a file's |contents| so it will function correctly in its |new_path| in Webkit.
+
+    Returns the list of modified properties and the modified text if the file was modifed, None otherwise."""
+    contents = host.filesystem.read_binary_file(filename)
+    converter = _W3CTestConverter(new_path, filename, host)
+    if filename.endswith('.css'):
+        return converter.add_webkit_prefix_to_unprefixed_properties(contents)
+    else:
+        converter.feed(contents)
+        converter.close()
+        return converter.output()
+
 
-    def __init__(self):
-        self._host = Host()
+class _W3CTestConverter(HTMLParser):
+    def __init__(self, new_path, filename, host=Host()):
+        HTMLParser.__init__(self)
+
+        self._host = host
         self._filesystem = self._host.filesystem
         self._webkit_root = WebKitFinder(self._filesystem).webkit_base()
 
+        self.converted_data = []
+        self.converted_properties = []
+        self.in_style_tag = False
+        self.style_data = []
+        self.filename = filename
+
+        resources_path = self.path_from_webkit_root('LayoutTests', 'resources')
+        resources_relpath = self._filesystem.relpath(resources_path, new_path)
+        self.new_test_harness_path = resources_relpath
+
         # These settings might vary between WebKit and Blink
         self._css_property_file = self.path_from_webkit_root('Source', 'WebCore', 'css', 'CSSPropertyNames.in')
         self._css_property_split_string = '='
 
-        self.prefixed_properties = self.read_webkit_prefixed_css_property_list()
+        self.test_harness_re = re.compile('/resources/testharness')
 
+        self.prefixed_properties = self.read_webkit_prefixed_css_property_list()
         prop_regex = '([\s{]|^)(' + "|".join(prop.replace('-webkit-', '') for prop in self.prefixed_properties) + ')(\s+:|:)'
         self.prop_re = re.compile(prop_regex)
 
+    def output(self):
+        return (self.converted_properties, ''.join(self.converted_data))
+
     def path_from_webkit_root(self, *comps):
         return self._filesystem.abspath(self._filesystem.join(self._webkit_root, *comps))
 
@@ -78,104 +106,7 @@ class W3CTestConverter(object):
         # Ignore any prefixed properties for which an unprefixed version is supported
         return [prop for prop in prefixed_properties if prop not in unprefixed_properties]
 
-    def convert_for_webkit(self, new_path, filename):
-        """ Converts a file's |contents| so it will function correctly in its |new_path| in Webkit.
-
-        Returns the list of modified properties and the modified text if the file was modifed, None otherwise."""
-        contents = self._filesystem.read_binary_file(filename)
-        if filename.endswith('.css'):
-            return self.convert_css(contents, filename)
-        return self.convert_html(new_path, contents, filename)
-
-    def convert_css(self, contents, filename):
-        return self.add_webkit_prefix_to_unprefixed_properties(contents, filename)
-
-    def convert_html(self, new_path, contents, filename):
-        doc = BeautifulSoup(contents)
-        did_modify_paths = self.convert_testharness_paths(doc, new_path, filename)
-        converted_properties_and_content = self.convert_prefixed_properties(doc, filename)
-        return converted_properties_and_content if (did_modify_paths or converted_properties_and_content[0]) else None
-
-    def convert_testharness_paths(self, doc, new_path, filename):
-        """ Update links to testharness.js in the BeautifulSoup |doc| to point to the copy in |new_path|.
-
-        Returns whether the document was modified."""
-
-        # Look for the W3C-style path to any testharness files - scripts (.js) or links (.css)
-        pattern = re.compile('/resources/testharness')
-        script_tags = doc.findAll(src=pattern)
-        link_tags = doc.findAll(href=pattern)
-        testharness_tags = script_tags + link_tags
-
-        if not testharness_tags:
-            return False
-
-        resources_path = self.path_from_webkit_root('LayoutTests', 'resources')
-        resources_relpath = self._filesystem.relpath(resources_path, new_path)
-
-        for tag in testharness_tags:
-            # FIXME: We need to handle img, audio, video tags also.
-            attr = 'src'
-            if tag.name != 'script':
-                attr = 'href'
-
-            if not attr in tag.attrMap:
-                # FIXME: Figure out what to do w/ invalid tags. For now, we return False
-                # and leave the document unmodified, which means that it'll probably fail to run.
-                _log.error("Missing an attr in %s" % filename)
-                return False
-
-            old_path = tag[attr]
-            new_tag = Tag(doc, tag.name, tag.attrs)
-            new_tag[attr] = re.sub(pattern, resources_relpath + '/testharness', old_path)
-
-            self.replace_tag(tag, new_tag)
-
-        return True
-
-    def convert_prefixed_properties(self, doc, filename):
-        """ Searches a BeautifulSoup |doc| for any CSS properties requiring the -webkit- prefix and converts them.
-
-        Returns the list of converted properties and the modified document as a string """
-
-        converted_properties = []
-
-        # Look for inline and document styles.
-        inline_styles = doc.findAll(style=re.compile('.*'))
-        style_tags = doc.findAll('style')
-        all_styles = inline_styles + style_tags
-
-        for tag in all_styles:
-
-            # Get the text whether in a style tag or style attribute.
-            style_text = ''
-            if tag.name == 'style':
-                if not tag.contents:
-                    continue
-                style_text = tag.contents[0]
-            else:
-                style_text = tag['style']
-
-            updated_style_text = self.add_webkit_prefix_to_unprefixed_properties(style_text, filename)
-
-            # Rewrite tag only if changes were made.
-            if updated_style_text[0]:
-                converted_properties.extend(list(updated_style_text[0]))
-
-                new_tag = Tag(doc, tag.name, tag.attrs)
-                new_tag.insert(0, updated_style_text[1])
-
-                self.replace_tag(tag, new_tag)
-
-        # FIXME: Doing the replace in the parsed document and then writing it back out
-        # is normalizing the HTML, which may in fact alter the intent of some tests.
-        # We should probably either just do basic string-replaces, or have some other
-        # way of flagging tests that are sensitive to being rewritten.
-        # https://bugs.webkit.org/show_bug.cgi?id=119159
-
-        return (converted_properties, doc.prettify())
-
-    def add_webkit_prefix_to_unprefixed_properties(self, text, filename):
+    def add_webkit_prefix_to_unprefixed_properties(self, text):
         """ Searches |text| for instances of properties requiring the -webkit- prefix and adds the prefix to them.
 
         Returns the list of converted properties and the modified text."""
@@ -193,9 +124,65 @@ class W3CTestConverter(object):
             _log.info('  converting %s', prop)
 
         # FIXME: Handle the JS versions of these properties and GetComputedStyle, too.
-        return (converted_properties, text)
+        return (converted_properties, ''.join(text_chunks))
+
+    def convert_style_data(self, data):
+        converted = self.add_webkit_prefix_to_unprefixed_properties(data)
+        if converted[0]:
+            self.converted_properties.extend(list(converted[0]))
+        return converted[1]
+
+    def convert_attributes_if_needed(self, tag, attrs):
+        converted = self.get_starttag_text()
+        if tag in ('script', 'link'):
+            attr_name = 'src'
+            if tag != 'script':
+                attr_name = 'href'
+            for attr in attrs:
+                if attr[0] == attr_name:
+                    new_path = re.sub(self.test_harness_re, self.new_test_harness_path + '/testharness', attr[1])
+                    converted = re.sub(attr[1], new_path, converted)
+
+        for attr in attrs:
+            if attr[0] == 'style':
+                new_style = self.convert_style_data(attr[1])
+                converted = re.sub(attr[1], new_style, converted)
+
+        self.converted_data.append(converted)
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'style':
+            self.in_style_tag = True
+        self.convert_attributes_if_needed(tag, attrs)
+
+    def handle_endtag(self, tag):
+        if tag == 'style':
+            self.converted_data.append(self.convert_style_data(''.join(self.style_data)))
+            self.in_style_tag = False
+            self.style_data = []
+        self.converted_data.extend(['</', tag, '>'])
+
+    def handle_startendtag(self, tag, attrs):
+        self.convert_attributes_if_needed(tag, attrs)
+
+    def handle_data(self, data):
+        if self.in_style_tag:
+            self.style_data.append(data)
+        else:
+            self.converted_data.append(data)
+
+    def handle_entityref(self, name):
+        self.converted_data.extend(['&', name, ';'])
+
+    def handle_charref(self, name):
+        self.converted_data.extend(['&#', name, ';'])
+
+    def handle_comment(self, data):
+        self.converted_data.extend(['<!-- ', data, ' -->'])
+
+    def handle_decl(self, decl):
+        self.converted_data.extend(['<!', decl, '>'])
+
+    def handle_pi(self, data):
+        self.converted_data.extend(['<?', data, '>'])
 
-    def replace_tag(self, old_tag, new_tag):
-        index = old_tag.parent.contents.index(old_tag)
-        old_tag.parent.insert(index, new_tag)
-        old_tag.extract()
index 5e74c2a..d20e189 100644 (file)
@@ -31,23 +31,29 @@ import os
 import re
 import unittest2 as unittest
 
+from webkitpy.common.host import Host
 from webkitpy.common.system.outputcapture import OutputCapture
+from webkitpy.common.webkit_finder import WebKitFinder
 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
-from webkitpy.w3c.test_converter import W3CTestConverter
-
+from webkitpy.w3c.test_converter import _W3CTestConverter
 
 DUMMY_FILENAME = 'dummy.html'
+DUMMY_PATH = 'dummy/testharness/path'
 
 class W3CTestConverterTest(unittest.TestCase):
 
-    def fake_dir_path(self, converter, dirname):
-        return converter.path_from_webkit_root("LayoutTests", "css", dirname)
+    # FIXME: When we move to using a MockHost, this method should be removed, since
+    #        then we can just pass in a dummy dir path
+    def fake_dir_path(self, dirname):
+        filesystem = Host().filesystem
+        webkit_root = WebKitFinder(filesystem).webkit_base()
+        return filesystem.abspath(filesystem.join(webkit_root, "LayoutTests", "css", dirname))
 
     def test_read_prefixed_property_list(self):
         """ Tests that the current list of properties requiring the -webkit- prefix load correctly """
 
         # FIXME: We should be passing in a MockHost here ...
-        converter = W3CTestConverter()
+        converter = _W3CTestConverter(DUMMY_PATH, DUMMY_FILENAME)
         prop_list = converter.prefixed_properties
         self.assertTrue(prop_list, 'No prefixed properties found')
 
@@ -72,16 +78,18 @@ CONTENT OF TEST
 </body>
 </html>
 """
-        converter = W3CTestConverter()
+        converter = _W3CTestConverter(DUMMY_PATH, DUMMY_FILENAME)
 
         oc = OutputCapture()
         oc.capture_output()
         try:
-            converted = converter.convert_html('/nothing/to/convert', test_html, DUMMY_FILENAME)
+            converter.feed(test_html)
+            converter.close()
+            converted = converter.output()
         finally:
             oc.restore_output()
 
-        self.verify_no_conversion_happened(converted)
+        self.verify_no_conversion_happened(converted, test_html)
 
     def test_convert_for_webkit_harness_only(self):
         """ Tests convert_for_webkit() using a basic JS test that uses testharness.js only and has no prefixed properties """
@@ -91,10 +99,11 @@ CONTENT OF TEST
 <script src="/resources/testharness.js"></script>
 </head>
 """
-        converter = W3CTestConverter()
-        fake_dir_path = self.fake_dir_path(converter, "harnessonly")
-
-        converted = converter.convert_html(fake_dir_path, test_html, DUMMY_FILENAME)
+        fake_dir_path = self.fake_dir_path("harnessonly")
+        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)
+        converter.feed(test_html)
+        converter.close()
+        converted = converter.output()
 
         self.verify_conversion_happened(converted)
         self.verify_test_harness_paths(converter, converted[1], fake_dir_path, 1, 1)
@@ -118,14 +127,16 @@ CONTENT OF TEST
 </body>
 </html>
 """
-        converter = W3CTestConverter()
-        fake_dir_path = self.fake_dir_path(converter, 'harnessandprops')
+        fake_dir_path = self.fake_dir_path('harnessandprops')
+        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)
         test_content = self.generate_test_content(converter.prefixed_properties, 1, test_html)
 
         oc = OutputCapture()
         oc.capture_output()
         try:
-            converted = converter.convert_html(fake_dir_path, test_content[1], DUMMY_FILENAME)
+            converter.feed(test_content[1])
+            converter.close()
+            converted = converter.output()
         finally:
             oc.restore_output()
 
@@ -153,14 +164,16 @@ CONTENT OF TEST
 </body>
 </html>
 """
-        converter = W3CTestConverter()
-        fake_dir_path = self.fake_dir_path(converter, 'harnessandprops')
+        fake_dir_path = self.fake_dir_path('harnessandprops')
+        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)
 
         oc = OutputCapture()
         oc.capture_output()
         try:
             test_content = self.generate_test_content(converter.prefixed_properties, 2, test_html)
-            converted = converter.convert_html(fake_dir_path, test_content[1], DUMMY_FILENAME)
+            converter.feed(test_content[1])
+            converter.close()
+            converted = converter.output()
         finally:
             oc.restore_output()
 
@@ -177,20 +190,20 @@ CONTENT OF TEST
 <script src="/resources/testharnessreport.js"></script>
 </head>
 """
-        converter = W3CTestConverter()
+        fake_dir_path = self.fake_dir_path('testharnesspaths')
+        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)
 
-        fake_dir_path = self.fake_dir_path(converter, 'testharnesspaths')
-
-        doc = BeautifulSoup(test_html)
         oc = OutputCapture()
         oc.capture_output()
         try:
-            converted = converter.convert_testharness_paths(doc, fake_dir_path, DUMMY_FILENAME)
+            converter.feed(test_html)
+            converter.close()
+            converted = converter.output()
         finally:
             oc.restore_output()
 
         self.verify_conversion_happened(converted)
-        self.verify_test_harness_paths(converter, doc, fake_dir_path, 2, 1)
+        self.verify_test_harness_paths(converter, converted[1], fake_dir_path, 2, 1)
 
     def test_convert_prefixed_properties(self):
         """ Tests convert_prefixed_properties() file that has 20 properties requiring the -webkit- prefix:
@@ -255,14 +268,15 @@ CONTENT OF TEST
 ]]></style>
 </html>
 """
-        converter = W3CTestConverter()
-
+        converter = _W3CTestConverter(DUMMY_PATH, DUMMY_FILENAME)
         test_content = self.generate_test_content(converter.prefixed_properties, 20, test_html)
 
         oc = OutputCapture()
         oc.capture_output()
         try:
-            converted = converter.convert_prefixed_properties(BeautifulSoup(test_content[1]), DUMMY_FILENAME)
+            converter.feed(test_content[1])
+            converter.close()
+            converted = converter.output()
         finally:
             oc.restore_output()
 
@@ -272,8 +286,8 @@ CONTENT OF TEST
     def verify_conversion_happened(self, converted):
         self.assertTrue(converted, "conversion didn't happen")
 
-    def verify_no_conversion_happened(self, converted):
-        self.assertEqual(converted, None, 'test should not have been converted')
+    def verify_no_conversion_happened(self, converted, original):
+        self.assertEqual(converted[1], original, 'test should not have been converted')
 
     def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths):
         if isinstance(converted, basestring):
index 1f4e3d8..668a01b 100644 (file)
@@ -96,7 +96,7 @@ from webkitpy.common.host import Host
 from webkitpy.common.webkit_finder import WebKitFinder
 from webkitpy.common.system.executive import ScriptError
 from webkitpy.w3c.test_parser import TestParser
-from webkitpy.w3c.test_converter import W3CTestConverter
+from webkitpy.w3c.test_converter import convert_for_webkit
 
 
 TEST_STATUS_UNKNOWN = 'unknown'
@@ -286,7 +286,6 @@ class TestImporter(object):
                     'reftests': reftests, 'jstests': jstests, 'total_tests': total_tests})
 
     def import_tests(self):
-        converter = W3CTestConverter()
         total_imported_tests = 0
         total_imported_reftests = 0
         total_imported_jstests = 0
@@ -343,7 +342,7 @@ class TestImporter(object):
                 # FIXME: Eventually, so should js when support is added for this type of conversion
                 mimetype = mimetypes.guess_type(orig_filepath)
                 if 'html' in str(mimetype[0]) or 'xml' in str(mimetype[0])  or 'css' in str(mimetype[0]):
-                    converted_file = converter.convert_for_webkit(new_path, filename=orig_filepath)
+                    converted_file = convert_for_webkit(new_path, filename=orig_filepath)
 
                     if not converted_file:
                         shutil.copyfile(orig_filepath, new_filepath)  # The file was unmodified.