2010-07-02 Eric Seidel <eric@webkit.org>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 4 Jul 2010 21:15:44 +0000 (21:15 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 4 Jul 2010 21:15:44 +0000 (21:15 +0000)
        Reviewed by Adam Barth.

        HTMLTreeBuilder needs an adoption agency
        https://bugs.webkit.org/show_bug.cgi?id=41453

        Added new adoption01 suite for testing adoption agency
        bugs.  Right now only the simplest adoption test passes.
        I'll be adding more in future commits.

        * html5lib/resources/adoption01.dat: Added.
        * html5lib/runner-expected-html5.txt:
        * html5lib/runner-expected.txt:
        * html5lib/runner.html:
2010-07-01  Eric Seidel  <eric@webkit.org>

        Reviewed by Adam Barth.

        HTMLTreeBuilder needs an adoption agency
        https://bugs.webkit.org/show_bug.cgi?id=41453

        This changes some test results, but only makes the simplest
        adoption agency cases pass.  I think the code is likely
        very close, but further iteration to make this change larger
        seems counter-productive.  I recommend we check in this
        progression and work from here.

        * dom/ContainerNode.cpp:
        (WebCore::ContainerNode::addChildCommon):
         - Make sure callers don't assume this will reparent.
        (WebCore::ContainerNode::parserAddChild):
         - Update comment to document lack of reparenting behavior.
        * html/HTMLElementStack.cpp:
        (WebCore::HTMLElementStack::ElementRecord::ElementRecord):
        (WebCore::HTMLElementStack::ElementRecord::~ElementRecord):
        (WebCore::HTMLElementStack::ElementRecord::replaceElement):
        (WebCore::HTMLElementStack::ElementRecord::isAbove):
         - Added for debugging.
        (WebCore::HTMLElementStack::pushHTMLHtmlElement):
        (WebCore::HTMLElementStack::insertAbove):
         - Needed for the adoption agency.
        (WebCore::HTMLElementStack::topRecord):
        (WebCore::HTMLElementStack::bottom):
        (WebCore::HTMLElementStack::removeHTMLHeadElement):
        (WebCore::HTMLElementStack::remove):
        (WebCore::HTMLElementStack::find):
        (WebCore::HTMLElementStack::topmost):
        (WebCore::HTMLElementStack::contains):
        (WebCore::HTMLElementStack::htmlElement):
        (WebCore::HTMLElementStack::headElement):
        (WebCore::HTMLElementStack::bodyElement):
        (WebCore::HTMLElementStack::pushCommon):
        (WebCore::HTMLElementStack::removeNonTopCommon):
         - Fix the name to match top/bottom.
        * html/HTMLElementStack.h:
        (WebCore::HTMLElementStack::ElementRecord::element):
        (WebCore::HTMLElementStack::ElementRecord::next):
        (WebCore::HTMLElementStack::ElementRecord::releaseNext):
        (WebCore::HTMLElementStack::ElementRecord::setNext):
        * html/HTMLFormattingElementList.cpp:
        (WebCore::HTMLFormattingElementList::closestElementInScopeWithName):
        (WebCore::HTMLFormattingElementList::contains):
        (WebCore::HTMLFormattingElementList::find):
        (WebCore::HTMLFormattingElementList::remove):
        * html/HTMLFormattingElementList.h:
        (WebCore::HTMLFormattingElementList::isEmpty):
        (WebCore::HTMLFormattingElementList::size):
        * html/HTMLTreeBuilder.cpp:
        (WebCore::HTMLTreeBuilder::processStartTag):
        (WebCore::HTMLTreeBuilder::furthestBlockForFormattingElement):
         - Part of the Adoption Agency algorithm.
        (WebCore::HTMLTreeBuilder::findFosterParentFor):
         - Used to move mis-nested content out of tables.
           This doesn't seem to work quite right yet.
        (WebCore::HTMLTreeBuilder::reparentChildren):
        (WebCore::HTMLTreeBuilder::callTheAdoptionAgency):
         - The ridiculously long/complicated adoption agency algorithm from HTML5.
        (WebCore::HTMLTreeBuilder::processEndTag):
        * html/HTMLTreeBuilder.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@62468 268f45cc-cd09-0410-ab3c-d52691b4dbfc

14 files changed:
LayoutTests/ChangeLog
LayoutTests/html5lib/resources/adoption01.dat [new file with mode: 0644]
LayoutTests/html5lib/runner-expected-html5.txt
LayoutTests/html5lib/runner-expected.txt
LayoutTests/html5lib/runner.html
WebCore/ChangeLog
WebCore/dom/ContainerNode.cpp
WebCore/html/HTMLElementStack.cpp
WebCore/html/HTMLElementStack.h
WebCore/html/HTMLFormattingElementList.cpp
WebCore/html/HTMLFormattingElementList.h
WebCore/html/HTMLTreeBuilder.cpp
WebCore/html/HTMLTreeBuilder.h
WebCore/page/Frame.cpp

index ba834be..214ec0f 100644 (file)
@@ -1,3 +1,19 @@
+2010-07-02  Eric Seidel  <eric@webkit.org>
+
+        Reviewed by Adam Barth.
+
+        HTMLTreeBuilder needs an adoption agency
+        https://bugs.webkit.org/show_bug.cgi?id=41453
+
+        Added new adoption01 suite for testing adoption agency
+        bugs.  Right now only the simplest adoption test passes.
+        I'll be adding more in future commits.
+
+        * html5lib/resources/adoption01.dat: Added.
+        * html5lib/runner-expected-html5.txt:
+        * html5lib/runner-expected.txt:
+        * html5lib/runner.html:
+
 2010-07-04  Robert Hogan  <robert@webkit.org>
 
         [Qt] Triage failing HTTP tests in Qt Skipped list
diff --git a/LayoutTests/html5lib/resources/adoption01.dat b/LayoutTests/html5lib/resources/adoption01.dat
new file mode 100644 (file)
index 0000000..674f7af
--- /dev/null
@@ -0,0 +1,84 @@
+#data
+<a><p></a></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+#data
+<a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<button>2</a>3</button>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <button>
+|         "2"
+|     "3"
+
+#data
+<a>1<b>2</a>3</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+#data
+<a>1<div>2<div>3</a>4</div>5</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+#data
+<table><a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
index fac285f..5a3b8d8 100644 (file)
@@ -129,7 +129,7 @@ Got:
 |         <a>
 |           <a>
 |             "Y"
-|             "Z"
+|           "Z"
 Expected:
 | <html>
 |   <head>
@@ -388,7 +388,8 @@ Got:
 |   <head>
 |   <body>
 |     <b>
-|       <p>
+|     <p>
+|       <b>
 |         "TEST"
 Expected:
 | <html>
@@ -435,7 +436,7 @@ Got:
 |         <font>
 |           <b>
 |             "cruel"
-|             "world"
+|         "world"
 Expected:
 | <html>
 |   <head>
@@ -525,9 +526,10 @@ Got:
 |       <b>
 |         <cite>
 |           "B"
-|           <div>
-|             "C"
-|             "D"
+|       <div>
+|         <b>
+|           "C"
+|           "D"
 Expected:
 | <html>
 |   <head>
@@ -561,9 +563,13 @@ Got:
 |                           <cite>
 |                             <i>
 |                               <i>
-|                                 <div>
-|                                   "X"
-|                                   "TEST"
+|             <i>
+|               <i>
+|                 <i>
+|                   <div>
+|                     <b>
+|                       "X"
+|                       "TEST"
 Expected:
 | <html>
 |   <head>
@@ -697,7 +703,9 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|           <p>
+|             <b>
 |               " jkl "
 Expected:
 | <html>
@@ -727,7 +735,9 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|           <p>
+|             <b>
 |               " jkl "
 |               " mno"
 Expected:
@@ -759,7 +769,10 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|         <p>
+|           <i>
+|             <b>
 |               " jkl "
 |               " mno "
 Expected:
@@ -792,7 +805,10 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|         <p>
+|           <i>
+|             <b>
 |               " jkl "
 |               " mno "
 |               " pqr"
@@ -827,7 +843,10 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|         <p>
+|           <i>
+|             <b>
 |               " jkl "
 |               " mno "
 |               " pqr "
@@ -862,11 +881,14 @@ Got:
 |         <b>
 |           <i>
 |             " ghi "
-|             <p>
+|         <i>
+|         <p>
+|           <i>
+|             <b>
 |               " jkl "
 |               " mno "
 |               " pqr "
-|             " stu"
+|         " stu"
 Expected:
 | <html>
 |   <head>
@@ -1012,10 +1034,9 @@ Got:
 |     <strike>
 |       <strike>
 |         <code>
-|           <code>
-|             <code>
-|               <code>
-|                 <strike>
+|       <code>
+|         <code>
+|           <strike>
 Expected:
 | <html>
 |   <head>
@@ -1251,7 +1272,7 @@ Got:
 |                   <b>
 |                     <b>
 |                       <em>
-|                         <li>
+|                     <li>
 Expected:
 | <html>
 |   <head>
@@ -1900,8 +1921,8 @@ Got:
 |     <nobr>
 |       <nobr>
 |         <nobr>
+|         <nobr>
 |           <nobr>
-|             <nobr>
 Expected:
 | <!DOCTYPE html>
 | <html>
@@ -3100,7 +3121,8 @@ Got:
 |     <b>
 |       "a"
 |       <div>
-|       <div>
+|     <div>
+|       <b>
 |         "y"
 Expected:
 | <html>
@@ -3120,7 +3142,8 @@ Got:
 |   <head>
 |   <body>
 |     <a>
-|       <div>
+|     <div>
+|       <a>
 |         <p>
 Expected:
 | <html>
@@ -5488,4 +5511,128 @@ Expected:
 resources/entities02.dat: PASS
 
 resources/comments01.dat: PASS
+
+resources/adoption01.dat:
+2
+3
+4
+5
+6
+
+Test 2 of 6 in resources/adoption01.dat failed. Input:
+<a>1<p>2</a>3</p>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|         "3"
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+Test 3 of 6 in resources/adoption01.dat failed. Input:
+<a>1<button>2</a>3</button>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <a>
+|         <button>
+|           "2"
+|           "3"
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <button>
+|         "2"
+|     "3"
+
+Test 4 of 6 in resources/adoption01.dat failed. Input:
+<a>1<b>2</a>3</b>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <a>
+|         <b>
+|           "2"
+|       "3"
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+Test 5 of 6 in resources/adoption01.dat failed. Input:
+<a>1<div>2<div>3</a>4</div>5</div>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|         <div>
+|           "3"
+|           "4"
+|     "5"
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+Test 6 of 6 in resources/adoption01.dat failed. Input:
+<table><a>1<p>2</a>3</p>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <table>
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
 #EOF
index 97a2ceb..d9be58e 100644 (file)
@@ -4890,3 +4890,48 @@ Expected:
 resources/entities02.dat: PASS
 
 resources/comments01.dat: PASS
+
+resources/adoption01.dat:
+1
+6
+
+Test 1 of 6 in resources/adoption01.dat failed. Input:
+<a><p></a></p>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+Test 6 of 6 in resources/adoption01.dat failed. Input:
+<table><a>1<p>2</a>3</p>
+Got:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <p>
+|         "2"
+|         "3"
+|     <table>
+|       <tbody>
+Expected:
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
index 9f74568..f3052f0 100644 (file)
@@ -58,7 +58,8 @@ var test_files = [
         'resources/scriptdata01.dat',
         'resources/entities01.dat',
         'resources/entities02.dat',
-        'resources/comments01.dat'
+        'resources/comments01.dat',
+        'resources/adoption01.dat'
     ],
     tests = [],
     iframe = document.getElementsByTagName("iframe")[0],
index 418f985..701b25a 100644 (file)
@@ -1,3 +1,69 @@
+2010-07-01  Eric Seidel  <eric@webkit.org>
+
+        Reviewed by Adam Barth.
+
+        HTMLTreeBuilder needs an adoption agency
+        https://bugs.webkit.org/show_bug.cgi?id=41453
+
+        This changes some test results, but only makes the simplest
+        adoption agency cases pass.  I think the code is likely
+        very close, but further iteration to make this change larger
+        seems counter-productive.  I recommend we check in this
+        progression and work from here.
+
+        * dom/ContainerNode.cpp:
+        (WebCore::ContainerNode::addChildCommon):
+         - Make sure callers don't assume this will reparent.
+        (WebCore::ContainerNode::parserAddChild):
+         - Update comment to document lack of reparenting behavior.
+        * html/HTMLElementStack.cpp:
+        (WebCore::HTMLElementStack::ElementRecord::ElementRecord):
+        (WebCore::HTMLElementStack::ElementRecord::~ElementRecord):
+        (WebCore::HTMLElementStack::ElementRecord::replaceElement):
+        (WebCore::HTMLElementStack::ElementRecord::isAbove):
+         - Added for debugging.
+        (WebCore::HTMLElementStack::pushHTMLHtmlElement):
+        (WebCore::HTMLElementStack::insertAbove):
+         - Needed for the adoption agency.
+        (WebCore::HTMLElementStack::topRecord):
+        (WebCore::HTMLElementStack::bottom):
+        (WebCore::HTMLElementStack::removeHTMLHeadElement):
+        (WebCore::HTMLElementStack::remove):
+        (WebCore::HTMLElementStack::find):
+        (WebCore::HTMLElementStack::topmost):
+        (WebCore::HTMLElementStack::contains):
+        (WebCore::HTMLElementStack::htmlElement):
+        (WebCore::HTMLElementStack::headElement):
+        (WebCore::HTMLElementStack::bodyElement):
+        (WebCore::HTMLElementStack::pushCommon):
+        (WebCore::HTMLElementStack::removeNonTopCommon):
+         - Fix the name to match top/bottom.
+        * html/HTMLElementStack.h:
+        (WebCore::HTMLElementStack::ElementRecord::element):
+        (WebCore::HTMLElementStack::ElementRecord::next):
+        (WebCore::HTMLElementStack::ElementRecord::releaseNext):
+        (WebCore::HTMLElementStack::ElementRecord::setNext):
+        * html/HTMLFormattingElementList.cpp:
+        (WebCore::HTMLFormattingElementList::closestElementInScopeWithName):
+        (WebCore::HTMLFormattingElementList::contains):
+        (WebCore::HTMLFormattingElementList::find):
+        (WebCore::HTMLFormattingElementList::remove):
+        * html/HTMLFormattingElementList.h:
+        (WebCore::HTMLFormattingElementList::isEmpty):
+        (WebCore::HTMLFormattingElementList::size):
+        * html/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::processStartTag):
+        (WebCore::HTMLTreeBuilder::furthestBlockForFormattingElement):
+         - Part of the Adoption Agency algorithm.
+        (WebCore::HTMLTreeBuilder::findFosterParentFor):
+         - Used to move mis-nested content out of tables.
+           This doesn't seem to work quite right yet.
+        (WebCore::HTMLTreeBuilder::reparentChildren):
+        (WebCore::HTMLTreeBuilder::callTheAdoptionAgency):
+         - The ridiculously long/complicated adoption agency algorithm from HTML5.
+        (WebCore::HTMLTreeBuilder::processEndTag):
+        * html/HTMLTreeBuilder.h:
+
 2010-07-04  Justin Schuh  <jschuh@chromium.org>
 
         Reviewed by Darin Adler.
index 4c4f208..6539e5b 100644 (file)
@@ -539,6 +539,7 @@ bool ContainerNode::appendChild(PassRefPtr<Node> newChild, ExceptionCode& ec, bo
 
 void ContainerNode::addChildCommon(Node* newChild)
 {
+    ASSERT(!newChild->parent()); // Use appendChild if you need to handle reparenting.
     forbidEventDispatch();
     Node* last = m_lastChild;
     // FIXME: This method should take a PassRefPtr.
@@ -555,7 +556,7 @@ void ContainerNode::parserAddChild(PassRefPtr<Node> newChild)
 {
     ASSERT(newChild);
     // This function is only used during parsing.
-    // It does not send any DOM mutation events.
+    // It does not send any DOM mutation events or handle reparenting.
 
     addChildCommon(newChild.get());
 }
index 70761c6..3865897 100644 (file)
@@ -38,23 +38,32 @@ namespace WebCore {
 
 using namespace HTMLNames;
 
-class HTMLElementStack::ElementRecord : public Noncopyable {
-public:
-    ElementRecord(PassRefPtr<Element> element, PassOwnPtr<ElementRecord> next)
-        : m_element(element)
-        , m_next(next)
-    {
-    }
+HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<Element> element, PassOwnPtr<ElementRecord> next)
+    : m_element(element)
+    , m_next(next)
+{
+    ASSERT(m_element);
+}
 
-    Element* element() const { return m_element.get(); }
-    ElementRecord* next() const { return m_next.get(); }
-    PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); }
-    void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; }
+HTMLElementStack::ElementRecord::~ElementRecord()
+{
+}
+
+void HTMLElementStack::ElementRecord::replaceElement(PassRefPtr<Element> element)
+{
+    ASSERT(element);
+    // FIXME: Should this call finishParsingChildren?
+    m_element = element;
+}
 
-private:
-    RefPtr<Element> m_element;
-    OwnPtr<ElementRecord> m_next;
-};
+bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const
+{
+    for (ElementRecord* below = next(); below; below = below->next()) {
+        if (below == other)
+            return true;
+    }
+    return false;
+}
 
 HTMLElementStack::HTMLElementStack()
     : m_htmlElement(0)
@@ -104,6 +113,7 @@ void HTMLElementStack::popUntil(Element* element)
 
 void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element)
 {
+    ASSERT(!m_top); // <html> should always be the bottom of the stack.
     ASSERT(element->hasTagName(HTMLNames::htmlTag));
     ASSERT(!m_htmlElement);
     m_htmlElement = element.get();
@@ -135,11 +145,46 @@ void HTMLElementStack::push(PassRefPtr<Element> element)
     pushCommon(element);
 }
 
+void HTMLElementStack::insertAbove(PassRefPtr<Element> element, ElementRecord* recordBelow)
+{
+    ASSERT(element);
+    ASSERT(recordBelow);
+    ASSERT(m_top);
+    ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!element->hasTagName(HTMLNames::headTag));
+    ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+    ASSERT(m_htmlElement);
+    if (recordBelow == m_top) {
+        push(element);
+        return;
+    }
+
+    for (ElementRecord* recordAbove = m_top.get(); recordAbove; recordAbove = recordAbove->next()) {
+        if (recordAbove->next() != recordBelow)
+            continue;
+
+        recordAbove->setNext(new ElementRecord(element, recordAbove->releaseNext()));
+        recordAbove->next()->element()->beginParsingChildren();
+        return;
+    }
+    ASSERT_NOT_REACHED();
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topRecord() const
+{
+    return m_top.get();
+}
+
 Element* HTMLElementStack::top() const
 {
     return m_top->element();
 }
 
+Element* HTMLElementStack::bottom() const
+{
+    return htmlElement();
+}
+
 void HTMLElementStack::removeHTMLHeadElement(Element* element)
 {
     ASSERT(m_headElement == element);
@@ -148,7 +193,7 @@ void HTMLElementStack::removeHTMLHeadElement(Element* element)
         return;
     }
     m_headElement = 0;
-    removeNonFirstCommon(element);
+    removeNonTopCommon(element);
 }
 
 void HTMLElementStack::remove(Element* element)
@@ -158,45 +203,58 @@ void HTMLElementStack::remove(Element* element)
         pop();
         return;
     }
-    removeNonFirstCommon(element);
+    removeNonTopCommon(element);
 }
 
-bool HTMLElementStack::contains(Element* element) const
+HTMLElementStack::ElementRecord* HTMLElementStack::find(Element* element) const
 {
     for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
         if (pos->element() == element)
-            return true;
+            return pos;
     }
-    return false;
+    return 0;
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topmost(const AtomicString& tagName) const
+{
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+        if (pos->element()->hasLocalName(tagName))
+            return pos;
+    }
+    return 0;
+}
+
+bool HTMLElementStack::contains(Element* element) const
+{
+    return !!find(element);
 }
 
 namespace {
 
-inline bool isScopeMarker(const Element* element)
+inline bool isScopeMarker(Element* element)
 {
     return element->hasTagName(appletTag)
+        || element->hasTagName(buttonTag)
         || element->hasTagName(captionTag)
-        || element->hasTagName(appletTag)
         || element->hasTagName(htmlTag)
+        || element->hasTagName(marqueeTag)
+        || element->hasTagName(objectTag)
         || element->hasTagName(tableTag)
         || element->hasTagName(tdTag)
         || element->hasTagName(thTag)
-        || element->hasTagName(buttonTag)
-        || element->hasTagName(marqueeTag)
-        || element->hasTagName(objectTag)
 #if ENABLE(SVG_FOREIGN_OBJECT)
         || element->hasTagName(SVGNames::foreignObjectTag)
 #endif
         ;
 }
 
-inline bool isListItemScopeMarker(const Element* element)
+inline bool isListItemScopeMarker(Element* element)
 {
     return isScopeMarker(element)
         || element->hasTagName(olTag)
         || element->hasTagName(ulTag);
 }
-inline bool isTableScopeMarker(const Element* element)
+inline bool isTableScopeMarker(Element* element)
 {
     return element->hasTagName(htmlTag)
         || element->hasTagName(tableTag);
@@ -204,7 +262,7 @@ inline bool isTableScopeMarker(const Element* element)
 
 }
 
-template <bool isMarker(const Element*)>
+template <bool isMarker(Element*)>
 bool inScopeCommon(HTMLElementStack::ElementRecord* top, const AtomicString& targetTag)
 {
     for (HTMLElementStack::ElementRecord* pos = top; pos; pos = pos->next()) {
@@ -246,19 +304,19 @@ bool HTMLElementStack::inTableScope(const AtomicString& targetTag) const
     return inScopeCommon<isTableScopeMarker>(m_top.get(), targetTag);
 }
 
-Element* HTMLElementStack::htmlElement()
+Element* HTMLElementStack::htmlElement() const
 {
     ASSERT(m_htmlElement);
     return m_htmlElement;
 }
 
-Element* HTMLElementStack::headElement()
+Element* HTMLElementStack::headElement() const
 {
     ASSERT(m_headElement);
     return m_headElement;
 }
 
-Element* HTMLElementStack::bodyElement()
+Element* HTMLElementStack::bodyElement() const
 {
     ASSERT(m_bodyElement);
     return m_bodyElement;
@@ -266,6 +324,7 @@ Element* HTMLElementStack::bodyElement()
 
 void HTMLElementStack::pushCommon(PassRefPtr<Element> element)
 {
+    ASSERT(m_htmlElement);
     m_top.set(new ElementRecord(element, m_top.release()));
     top()->beginParsingChildren();
 }
@@ -279,13 +338,12 @@ void HTMLElementStack::popCommon()
     m_top = m_top->releaseNext();
 }
 
-void HTMLElementStack::removeNonFirstCommon(Element* element)
+void HTMLElementStack::removeNonTopCommon(Element* element)
 {
     ASSERT(!element->hasTagName(HTMLNames::htmlTag));
     ASSERT(!element->hasTagName(HTMLNames::bodyTag));
-    ElementRecord* pos = m_top.get();
-    ASSERT(pos->element() != element);
-    while (pos->next()) {
+    ASSERT(top() != element);
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
         if (pos->next()->element() == element) {
             // FIXME: Is it OK to call finishParsingChildren()
             // when the children aren't actually finished?
index 8ca7c81..df290d4 100644 (file)
 #ifndef HTMLElementStack_h
 #define HTMLElementStack_h
 
-#include <wtf/Forward.h>
 #include <wtf/Noncopyable.h>
 #include <wtf/OwnPtr.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/RefPtr.h>
 
 namespace WebCore {
 
 class AtomicString;
 class Element;
 
+// NOTE: The HTML5 spec uses a backwards (grows downward) stack.  We're using
+// more standard (grows upwards) stack terminology here.
 class HTMLElementStack : public Noncopyable {
 public:
     HTMLElementStack();
     ~HTMLElementStack();
 
+    class ElementRecord : public Noncopyable {
+    public:
+        ~ElementRecord(); // Public for ~PassOwnPtr()
+    
+        Element* element() const { return m_element.get(); }
+        void replaceElement(PassRefPtr<Element>);
+
+        bool isAbove(ElementRecord*) const;
+
+        ElementRecord* next() const { return m_next.get(); }
+
+    private:
+        friend class HTMLElementStack;
+
+        ElementRecord(PassRefPtr<Element>, PassOwnPtr<ElementRecord>);
+
+        PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); }
+        void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; }
+
+        RefPtr<Element> m_element;
+        OwnPtr<ElementRecord> m_next;
+    };
+    
     Element* top() const;
+    ElementRecord* topRecord() const;
+    Element* bottom() const;
+    ElementRecord* find(Element*) const;
+    ElementRecord* topmost(const AtomicString& tagName) const;
+
+    void insertAbove(PassRefPtr<Element>, ElementRecord*);
 
     void push(PassRefPtr<Element>);
     void pushHTMLHtmlElement(PassRefPtr<Element>);
@@ -63,16 +95,14 @@ public:
     bool inListItemScope(const AtomicString& tagName) const;
     bool inTableScope(const AtomicString& tagName) const;
 
-    Element* htmlElement();
-    Element* headElement();
-    Element* bodyElement();
+    Element* htmlElement() const;
+    Element* headElement() const;
+    Element* bodyElement() const;
 
-    // Public so free functions can use it, but defined privately.
-    class ElementRecord;
 private:
     void pushCommon(PassRefPtr<Element>);
     void popCommon();
-    void removeNonFirstCommon(Element*);
+    void removeNonTopCommon(Element*);
 
     OwnPtr<ElementRecord> m_top;
 
index e70de42..f8e1dd5 100644 (file)
@@ -64,6 +64,16 @@ void HTMLFormattingElementList::Entry::replaceElement(PassRefPtr<Element> elemen
     m_element = element;
 }
 
+bool HTMLFormattingElementList::Entry::operator==(const Entry& other) const
+{
+    return element() == other.element();
+}
+
+bool HTMLFormattingElementList::Entry::operator!=(const Entry& other) const
+{
+    return element() != other.element();
+}
+
 HTMLFormattingElementList::HTMLFormattingElementList()
 {
 }
@@ -72,11 +82,45 @@ HTMLFormattingElementList::~HTMLFormattingElementList()
 {
 }
 
+Element* HTMLFormattingElementList::closestElementInScopeWithName(const AtomicString& targetName)
+{
+    for (unsigned i = 1; i <= m_entries.size(); ++i) {
+        const Entry& entry = m_entries[m_entries.size() - i];
+        if (entry.isMarker())
+            return 0;
+        if (entry.element()->hasLocalName(targetName))
+            return entry.element();
+    }
+    return 0;
+}
+
+bool HTMLFormattingElementList::contains(Element* element)
+{
+    return !!find(element);
+}
+
+HTMLFormattingElementList::Entry* HTMLFormattingElementList::find(Element* element)
+{
+    size_t index = m_entries.find(element);
+    if (index != notFound) {
+        // This is somewhat of a hack, and is why this method can't be const.
+        return &m_entries[index];
+    }
+    return 0;
+}
+
 void HTMLFormattingElementList::append(Element* element)
 {
     m_entries.append(element);
 }
 
+void HTMLFormattingElementList::remove(Element* element)
+{
+    size_t index = m_entries.find(element);
+    if (index != notFound)
+        m_entries.remove(index);
+}
+
 void HTMLFormattingElementList::clearToLastMarker()
 {
     while (m_entries.size() && !m_entries.last().isMarker())
index a9c56e6..d613a86 100644 (file)
@@ -32,6 +32,7 @@
 
 namespace WebCore {
 
+class AtomicString;
 class Element;
 
 // This may end up merged into HTMLElementStack.
@@ -40,12 +41,6 @@ public:
     HTMLFormattingElementList();
     ~HTMLFormattingElementList();
 
-    bool isEmpty() const { return !size(); }
-    size_t size() const { return m_entries.size(); }
-
-    void append(Element*);
-    void clearToLastMarker();
-
     // Ideally Entry would be private, but HTMLTreeBuilder has to coordinate
     // between the HTMLFormattingElementList and HTMLElementStack and needs
     // access to Entry::isMarker() and Entry::replaceElement() to do so.
@@ -61,10 +56,25 @@ public:
         Element* element() const;
         void replaceElement(PassRefPtr<Element>);
 
+        // Needed for use with Vector.
+        bool operator==(const Entry&) const;
+        bool operator!=(const Entry&) const;
+
     private:
         RefPtr<Element> m_element;
     };
 
+    bool isEmpty() const { return !size(); }
+    size_t size() const { return m_entries.size(); }
+
+    Element* closestElementInScopeWithName(const AtomicString&);
+
+    Entry* find(Element*);
+    bool contains(Element*);
+    void append(Element*);
+    void remove(Element*);
+    void clearToLastMarker();
+
     const Entry& operator[](size_t i) const { return m_entries[i]; }
     Entry& operator[](size_t i) { return m_entries[i]; }
 
index 24556c1..160dd48 100644 (file)
 #include "DocumentType.h"
 #include "Element.h"
 #include "Frame.h"
+#include "HTMLDocument.h"
 #include "HTMLElementFactory.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
 #include "HTMLScriptElement.h"
-#include "HTMLTokenizer.h"
 #include "HTMLToken.h"
-#include "HTMLDocument.h"
-#include "HTMLHtmlElement.h"
+#include "HTMLTokenizer.h"
 #include "LegacyHTMLDocumentParser.h"
-#include "HTMLNames.h"
 #include "LegacyHTMLTreeBuilder.h"
 #include "NotImplemented.h"
-#include "Settings.h"
+#include "SVGNames.h"
 #include "ScriptController.h"
+#include "Settings.h"
 #include "Text.h"
 #include <wtf/UnusedParam.h>
 
@@ -64,6 +65,132 @@ bool shouldUseLegacyTreeBuilder(Document* document)
     return !document->settings() || !document->settings()->html5TreeBuilderEnabled();
 }
 
+bool isNumberedHeaderTag(const AtomicString& tagName)
+{
+    return tagName == h1Tag
+        || tagName == h2Tag
+        || tagName == h3Tag
+        || tagName == h4Tag
+        || tagName == h5Tag
+        || tagName == h6Tag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
+bool isSpecialTag(const AtomicString& tagName)
+{
+    return tagName == addressTag
+        || tagName == articleTag
+        || tagName == asideTag
+        || tagName == baseTag
+        || tagName == basefontTag
+        || tagName == "bgsound"
+        || tagName == blockquoteTag
+        || tagName == bodyTag
+        || tagName == brTag
+        || tagName == buttonTag
+        || tagName == centerTag
+        || tagName == colTag
+        || tagName == colgroupTag
+        || tagName == "command"
+        || tagName == ddTag
+        || tagName == "details"
+        || tagName == dirTag
+        || tagName == divTag
+        || tagName == dlTag
+        || tagName == dtTag
+        || tagName == embedTag
+        || tagName == fieldsetTag
+        || tagName == "figure"
+        || tagName == footerTag
+        || tagName == formTag
+        || tagName == frameTag
+        || tagName == framesetTag
+        || isNumberedHeaderTag(tagName)
+        || tagName == headTag
+        || tagName == headerTag
+        || tagName == hgroupTag
+        || tagName == hrTag
+        || tagName == iframeTag
+        || tagName == imgTag
+        || tagName == inputTag
+        || tagName == isindexTag
+        || tagName == liTag
+        || tagName == linkTag
+        || tagName == listingTag
+        || tagName == menuTag
+        || tagName == metaTag
+        || tagName == navTag
+        || tagName == noembedTag
+        || tagName == noframesTag
+        || tagName == noscriptTag
+        || tagName == olTag
+        || tagName == pTag
+        || tagName == paramTag
+        || tagName == plaintextTag
+        || tagName == preTag
+        || tagName == scriptTag
+        || tagName == sectionTag
+        || tagName == selectTag
+        || tagName == styleTag
+        || tagName == tbodyTag
+        || tagName == textareaTag
+        || tagName == tfootTag
+        || tagName == theadTag
+        || tagName == titleTag
+        || tagName == trTag
+        || tagName == ulTag
+        || tagName == wbrTag
+        || tagName == xmpTag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#scoping
+// Same as isScopingTag in LegacyHTMLTreeBuilder.cpp
+// and isScopeMarker in HTMLElementStack.cpp
+bool isScopingTag(const AtomicString& tagName)
+{
+    return tagName == appletTag
+        || tagName == buttonTag
+        || tagName == captionTag
+#if ENABLE(SVG_FOREIGN_OBJECT)
+        || tagName == SVGNames::foreignObjectTag
+#endif
+        || tagName == htmlTag
+        || tagName == marqueeTag
+        || tagName == objectTag
+        || tagName == tableTag
+        || tagName == tdTag
+        || tagName == thTag;
+}
+
+bool isNonAnchorFormattingTag(const AtomicString& tagName)
+{
+    return tagName == bTag
+        || tagName == bigTag
+        || tagName == codeTag
+        || tagName == emTag
+        || tagName == fontTag
+        || tagName == iTag
+        || tagName == nobrTag
+        || tagName == sTag
+        || tagName == smallTag
+        || tagName == strikeTag
+        || tagName == strongTag
+        || tagName == ttTag
+        || tagName == uTag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
+bool isFormattingTag(const AtomicString& tagName)
+{
+    return tagName == aTag || isNonAnchorFormattingTag(tagName);
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#phrasing
+bool isPhrasingTag(const AtomicString& tagName)
+{
+    return !isSpecialTag(tagName) && !isScopingTag(tagName) && !isFormattingTag(tagName);
+}
+
 } // namespace
 
 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors)
@@ -419,7 +546,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
             insertElement(token);
             return;
         }
-        if (token.name() == h1Tag || token.name() == h2Tag || token.name() == h3Tag || token.name() == h4Tag || token.name() == h5Tag || token.name() == h6Tag) {
+        if (isNumberedHeaderTag(token.name())) {
             processFakePEndTagIfPInScope();
             notImplemented();
             insertElement(token);
@@ -470,7 +597,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
             insertFormattingElement(token);
             return;
         }
-        if (token.name() == bTag || token.name() == bigTag || token.name() == codeTag || token.name() == emTag || token.name() == fontTag || token.name() == iTag || token.name() == sTag || token.name() == smallTag || token.name() == strikeTag || token.name() == strongTag || token.name() == ttTag || token.name() == uTag) {
+        if (isNonAnchorFormattingTag(token.name())) {
             reconstructTheActiveFormattingElements();
             insertFormattingElement(token);
             return;
@@ -673,6 +800,160 @@ bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
     return true;
 }
 
+// FIXME: This probably belongs on HTMLElementStack.
+HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
+{
+    HTMLElementStack::ElementRecord* furthestBlock = 0;
+    HTMLElementStack::ElementRecord* record = m_openElements.topRecord();
+    for (; record; record = record->next()) {
+        if (record->element() == formattingElement)
+            return furthestBlock;
+        const AtomicString& tagName = record->element()->localName();
+        // !phrasing && !formatting == scoping || special
+        if (isScopingTag(tagName) || isSpecialTag(tagName))
+            furthestBlock = record;
+    }
+    ASSERT_NOT_REACHED();
+    return 0;
+}
+
+void HTMLTreeBuilder::findFosterParentFor(Element* element)
+{
+    Element* fosterParentElement = 0;
+    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
+    if (lastTableElementRecord) {
+        Element* lastTableElement = lastTableElementRecord->element();
+        if (lastTableElement->parent()) {
+            // FIXME: We need an insertElement which does not send mutation events.
+            ExceptionCode ec = 0;
+            lastTableElement->parent()->insertBefore(element, lastTableElement, ec);
+            ASSERT(!ec);
+            return;
+        }
+        fosterParentElement = lastTableElementRecord->next()->element();
+    } else {
+        ASSERT(m_isParsingFragment);
+        fosterParentElement = m_openElements.bottom(); // <html> element
+    }
+
+    fosterParentElement->parserAddChild(element);
+}
+
+// FIXME: This should have a whitty name.
+// FIXME: This must be implemented in many other places in WebCore.
+void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
+{
+    Node* child = oldParent->firstChild();
+    while (child) {
+        Node* nextChild = child->nextSibling();
+        ExceptionCode ec;
+        newParent->appendChild(child, ec);
+        ASSERT(!ec);
+        child = nextChild;
+    }
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
+{
+    while (1) {
+        // 1.
+        Element* formattingElement = m_activeFormattingElements.closestElementInScopeWithName(token.name());
+        if (!formattingElement || !m_openElements.inScope(formattingElement)) {
+            parseError(token);
+            notImplemented(); // Check the stack of open elements for a more specific parse error.
+            return;
+        }
+        HTMLElementStack::ElementRecord* formattingElementRecord = m_openElements.find(formattingElement);
+        if (!formattingElementRecord) {
+            parseError(token);
+            m_activeFormattingElements.remove(formattingElement);
+            return;
+        }
+        if (formattingElement != currentElement())
+            parseError(token);
+        // 2.
+        HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
+        // 3.
+        if (!furthestBlock) {
+            m_openElements.popUntil(formattingElement);
+            m_openElements.pop();
+            m_activeFormattingElements.remove(formattingElement);
+            return;
+        }
+        // 4.
+        ASSERT(furthestBlock->isAbove(formattingElementRecord));
+        Element* commonAncestor = formattingElementRecord->next()->element();
+        // 5.
+        notImplemented(); // bookmark?
+        // 6.
+        HTMLElementStack::ElementRecord* node = furthestBlock;
+        HTMLElementStack::ElementRecord* nextNode = node->next();
+        HTMLElementStack::ElementRecord* lastNode = furthestBlock;
+        while (1) {
+            // 6.1
+            node = nextNode;
+            ASSERT(node);
+            nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
+            // 6.2
+            if (!m_activeFormattingElements.contains(node->element())) {
+                m_openElements.remove(node->element());
+                node = 0;
+                continue;
+            }
+            // 6.3
+            if (node == formattingElementRecord)
+                break;
+            // 6.4
+            if (lastNode == furthestBlock)
+                notImplemented(); // move bookmark.
+            // 6.5
+            // FIXME: We're supposed to save the original token in the entry.
+            AtomicHTMLToken fakeToken(HTMLToken::StartTag, node->element()->localName());
+            // Is createElement correct? (instead of insertElement)
+            // Does this code ever leave newElement unattached?
+            RefPtr<Element> newElement = createElement(fakeToken);
+            HTMLFormattingElementList::Entry* nodeEntry = m_activeFormattingElements.find(node->element());
+            nodeEntry->replaceElement(newElement.get());
+            node->replaceElement(newElement.release());
+            // 6.6
+            // Use appendChild instead of parserAddChild to handle possible reparenting.
+            ExceptionCode ec;
+            node->element()->appendChild(lastNode->element(), ec);
+            ASSERT(!ec);
+            // 6.7
+            lastNode = node;
+        }
+        // 7
+        const AtomicString& commonAncestorTag = commonAncestor->localName();
+        if (commonAncestorTag == tableTag
+            || commonAncestorTag == tbodyTag
+            || commonAncestorTag == tfootTag
+            || commonAncestorTag == theadTag
+            || commonAncestorTag == trTag)
+            findFosterParentFor(lastNode->element());
+        else {
+            ExceptionCode ec;
+            commonAncestor->appendChild(lastNode->element(), ec);
+            ASSERT(!ec);
+        }
+        // 8
+        // FIXME: We're supposed to save the original token in the entry.
+        AtomicHTMLToken fakeToken(HTMLToken::StartTag, formattingElement->localName());
+        RefPtr<Element> newElement = createElement(fakeToken);
+        // 9
+        reparentChildren(furthestBlock->element(), newElement.get());
+        // 10
+        furthestBlock->element()->parserAddChild(newElement);
+        // 11
+        m_activeFormattingElements.remove(formattingElement);
+        notImplemented(); // insert new element at bookmark
+        // 12
+        m_openElements.remove(formattingElement);
+        m_openElements.insertAbove(newElement, furthestBlock);
+    }
+}
+
 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
 {
     switch (insertionMode()) {
@@ -787,7 +1068,7 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
             m_openElements.pop();
             return;
         }
-        if (token.name() == h1Tag || token.name() == h2Tag || token.name() == h3Tag || token.name() == h4Tag || token.name() == h5Tag || token.name() == h6Tag) {
+        if (isNumberedHeaderTag(token.name())) {
             if (!m_openElements.inScope(token.name())) {
                 parseError(token);
                 return;
@@ -803,9 +1084,8 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
             notImplemented(); // Take a deep breath.
             return;
         }
-        if (token.name() == aTag || token.name() == bTag || token.name() == bigTag || token.name() == codeTag || token.name() == emTag || token.name() == fontTag || token.name() == iTag || token.name() == nobrTag || token.name() == sTag || token.name() == smallTag || token.name() == strikeTag || token.name() == strongTag || token.name() == ttTag || token.name() == uTag) {
-            notImplemented();
-            // FIXME: There's a complicated algorithm that goes here.
+        if (isFormattingTag(token.name())) {
+            callTheAdoptionAgency(token);
             return;
         }
         if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) {
index f513f2a..bb76b9c 100644 (file)
@@ -125,6 +125,11 @@ private:
     bool processBodyEndTagForInBody(AtomicHTMLToken&);
     void processFakePEndTagIfPInScope();
 
+    HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
+    void findFosterParentFor(Element*);
+    void reparentChildren(Element* oldParent, Element* newParent);
+    void callTheAdoptionAgency(AtomicHTMLToken&);
+
     template<typename ChildType>
     PassRefPtr<ChildType> attach(Node* parent, PassRefPtr<ChildType> prpChild)
     {
index fee07dd..f2edd93 100644 (file)
@@ -152,6 +152,7 @@ inline Frame::Frame(Page* page, HTMLFrameOwnerElement* ownerElement, FrameLoader
     , m_isDisconnected(false)
     , m_excludeFromTextSearch(false)
 {
+    ASSERT(page);
     AtomicString::init();
     HTMLNames::init();
     QualifiedName::init();