2010-08-30 Adam Barth <abarth@webkit.org>
authorabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 31 Aug 2010 02:14:26 +0000 (02:14 +0000)
committerabarth@webkit.org <abarth@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 31 Aug 2010 02:14:26 +0000 (02:14 +0000)
        Reviewed by Eric Seidel.

        Implement CDATASection state for the HTMLTokenizer
        https://bugs.webkit.org/show_bug.cgi?id=44923

        In the spec, this state refers back to the HTMLTreeBuilder from the
        HTMLTokenizer.  Rather that introduce that (backwards) dependencies, we
        have the tree builder set a bit on the tokenizer.

        * html/parser/HTMLTokenizer.cpp:
        (WebCore::HTMLTokenizer::reset):
        (WebCore::HTMLTokenizer::nextToken):
        * html/parser/HTMLTokenizer.h:
        (WebCore::HTMLTokenizer::shouldAllowCDATA):
        (WebCore::HTMLTokenizer::setShouldAllowCDATA):
        * html/parser/HTMLTreeBuilder.cpp:
        (WebCore::HTMLTreeBuilder::constructTreeFromToken):
2010-08-30  Adam Barth  <abarth@webkit.org>

        Reviewed by Eric Seidel.

        Implement CDATASection state for the HTMLTokenizer
        https://bugs.webkit.org/show_bug.cgi?id=44923

        Large test progression.

        * html5lib/runner-expected.txt:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@66447 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/html5lib/runner-expected.txt
WebCore/ChangeLog
WebCore/html/parser/HTMLTokenizer.cpp
WebCore/html/parser/HTMLTokenizer.h
WebCore/html/parser/HTMLTreeBuilder.cpp

index a1339a5..0d72274 100644 (file)
@@ -2,6 +2,17 @@
 
         Reviewed by Eric Seidel.
 
+        Implement CDATASection state for the HTMLTokenizer
+        https://bugs.webkit.org/show_bug.cgi?id=44923
+
+        Large test progression.
+
+        * html5lib/runner-expected.txt:
+
+2010-08-30  Adam Barth  <abarth@webkit.org>
+
+        Reviewed by Eric Seidel.
+
         Enable HTML5lib's test_innerHTML01.dat
         https://bugs.webkit.org/show_bug.cgi?id=44919
 
index 3d15627..d12da74 100644 (file)
@@ -172,341 +172,8 @@ Expected:
 |     <keygen>
 resources/tests20.dat: PASS
 
-resources/tests21.dat:
-1
-2
-4
-5
-6
-7
-8
-9
-10
-11
-12
-14
-15
-16
-17
-18
-19
-20
-21
-22
-
-Test 1 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[foo]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[foo]] -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-Test 2 of 22 in resources/tests21.dat failed. Input:
-<math><![CDATA[foo]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <!-- [CDATA[foo]] -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       "foo"
-
-Test 4 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[foo
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[foo -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-Test 5 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[foo
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[foo -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-Test 6 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[ -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-Test 7 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[]] -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-Test 8 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]] >]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[]]  -->
-|       "]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]] >"
-
-Test 9 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]] >]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[]]  -->
-|       "]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]] >"
-
-Test 10 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]]
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[]] -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]]"
-
-Test 11 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[] -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]"
+resources/tests21.dat: PASS
 
-Test 12 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[]>a
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[] -->
-|       "a"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]>a"
-
-Test 14 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-
-Test 15 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[</svg>a]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[</svg -->
-|       "a]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "</svg>a"
-
-Test 16 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>a
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "a"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>a"
-
-Test 17 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[</svg>a
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[</svg -->
-|       "a"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "</svg>a"
-
-Test 18 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>]]><path>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "]]>"
-|       <svg path>
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-|       <svg path>
-
-Test 19 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>]]></path>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-
-Test 20 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>]]><!--path-->
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "]]>"
-|       <!-- path -->
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-|       <!-- path -->
-
-Test 21 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<svg>]]>path
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<svg -->
-|       "]]>path"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>path"
-
-Test 22 of 22 in resources/tests21.dat failed. Input:
-<svg><![CDATA[<!--svg-->]]>
-Got:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <!-- [CDATA[<!--svg-- -->
-|       "]]>"
-Expected:
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<!--svg-->"
 resources/tests_innerHTML_1.dat: PASS
 
 resources/webkit01.dat: PASS
index b16b560..f4aaaa0 100644 (file)
@@ -2,6 +2,26 @@
 
         Reviewed by Eric Seidel.
 
+        Implement CDATASection state for the HTMLTokenizer
+        https://bugs.webkit.org/show_bug.cgi?id=44923
+
+        In the spec, this state refers back to the HTMLTreeBuilder from the
+        HTMLTokenizer.  Rather that introduce that (backwards) dependencies, we
+        have the tree builder set a bit on the tokenizer.
+
+        * html/parser/HTMLTokenizer.cpp:
+        (WebCore::HTMLTokenizer::reset):
+        (WebCore::HTMLTokenizer::nextToken):
+        * html/parser/HTMLTokenizer.h:
+        (WebCore::HTMLTokenizer::shouldAllowCDATA):
+        (WebCore::HTMLTokenizer::setShouldAllowCDATA):
+        * html/parser/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::constructTreeFromToken):
+
+2010-08-30  Adam Barth  <abarth@webkit.org>
+
+        Reviewed by Eric Seidel.
+
         Enable HTML5lib's test_innerHTML01.dat
         https://bugs.webkit.org/show_bug.cgi?id=44919
 
index 4a8000c..5791842 100644 (file)
@@ -67,6 +67,12 @@ inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const ch
         source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
 }
 
+inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters)
+{
+    while (*expectedCharacters)
+        source.advanceAndASSERT(*expectedCharacters++);
+}
+
 inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string)
 {
     if (vector.size() != string.length())
@@ -113,6 +119,7 @@ void HTMLTokenizer::reset()
     m_lineNumber = 0;
     m_skipLeadingNewLineForListing = false;
     m_forceNullCharacterReplacement = false;
+    m_shouldAllowCDATA = false;
     m_additionalAllowedCharacter = '\0';
 }
 
@@ -1111,6 +1118,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     BEGIN_STATE(MarkupDeclarationOpenState) {
         DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
         DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
+        DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
         if (cc == '-') {
             SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
             if (result == SegmentedString::DidMatch) {
@@ -1127,10 +1135,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
                 SWITCH_TO(DOCTYPEState);
             } else if (result == SegmentedString::NotEnoughCharacters)
                 return haveBufferedCharacterToken();
+        } else if (cc == '[' && shouldAllowCDATA()) {
+            SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
+            if (result == SegmentedString::DidMatch) {
+                advanceStringAndASSERT(source, "[CDATA[");
+                SWITCH_TO(CDATASectionState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
         }
-        notImplemented();
-        // FIXME: We're still missing the bits about the insertion mode being in foreign content:
-        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
         parseError();
         RECONSUME_IN(BogusCommentState);
     }
@@ -1605,9 +1617,34 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     END_STATE()
 
     BEGIN_STATE(CDATASectionState) {
-        notImplemented();
-        ADVANCE_TO(CDATASectionState);
-        // FIXME: Handle EOF properly.
+        if (cc == ']')
+            ADVANCE_TO(CDATASectionRightSquareBracketState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            RECONSUME_IN(DataState);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(CDATASectionState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CDATASectionRightSquareBracketState) {
+        if (cc == ']')
+            ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+        else {
+            bufferCharacter(']');
+            RECONSUME_IN(CDATASectionState);
+        }
+    }
+
+    BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+        if (cc == '>')
+            ADVANCE_TO(DataState);
+        else {
+            bufferCharacter(']');
+            bufferCharacter(']');
+            RECONSUME_IN(CDATASectionState);
+        }
     }
     END_STATE()
 
index 2b93e15..bab77f3 100644 (file)
@@ -114,6 +114,9 @@ public:
         AfterDOCTYPESystemIdentifierState,
         BogusDOCTYPEState,
         CDATASectionState,
+        // These CDATA states are not in the HTML5 spec, but we use them internally.
+        CDATASectionRightSquareBracketState,
+        CDATASectionDoubleRightSquareBracketState,
     };
 
     static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenizer); }
@@ -139,6 +142,9 @@ public:
     bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
     void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
 
+    bool shouldAllowCDATA() const { return m_shouldAllowCDATA; }
+    void setShouldAllowCDATA(bool value) { m_shouldAllowCDATA = value; }
+
     bool shouldSkipNullCharacters() const
     {
         return !m_forceNullCharacterReplacement
@@ -270,6 +276,7 @@ private:
 
     bool m_skipLeadingNewLineForListing;
     bool m_forceNullCharacterReplacement;
+    bool m_shouldAllowCDATA;
 
     // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
     Vector<UChar, 32> m_temporaryBuffer;
index 44d492e..07db2f3 100644 (file)
@@ -501,6 +501,7 @@ void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
     // the U+0000 characters into replacement characters has compatibility
     // problems.
     m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
+    m_tokenizer->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
 }
 
 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)