2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "HTMLTreeBuilder.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
53 #include <wtf/unicode/CharacterNames.h>
57 using namespace HTMLNames;
59 static const int uninitializedLineNumberValue = -1;
61 static TextPosition uninitializedPositionValue1()
63 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
70 return isHTMLSpace(character) || character == replacementCharacter;
73 inline bool isAllWhitespace(const String& string)
75 return string.isAllSpecialCharacters<isHTMLSpace>();
78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
80 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
83 bool isNumberedHeaderTag(const AtomicString& tagName)
85 return tagName == h1Tag
93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
95 return tagName == captionTag
97 || tagName == colgroupTag;
100 bool isTableCellContextTag(const AtomicString& tagName)
102 return tagName == thTag || tagName == tdTag;
105 bool isTableBodyContextTag(const AtomicString& tagName)
107 return tagName == tbodyTag
108 || tagName == tfootTag
109 || tagName == theadTag;
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
113 bool isSpecialNode(Node* node)
115 if (node->hasTagName(MathMLNames::miTag)
116 || node->hasTagName(MathMLNames::moTag)
117 || node->hasTagName(MathMLNames::mnTag)
118 || node->hasTagName(MathMLNames::msTag)
119 || node->hasTagName(MathMLNames::mtextTag)
120 || node->hasTagName(MathMLNames::annotation_xmlTag)
121 || node->hasTagName(SVGNames::foreignObjectTag)
122 || node->hasTagName(SVGNames::descTag)
123 || node->hasTagName(SVGNames::titleTag))
125 if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
127 if (!isInHTMLNamespace(node))
129 const AtomicString& tagName = node->localName();
130 return tagName == addressTag
131 || tagName == appletTag
132 || tagName == areaTag
133 || tagName == articleTag
134 || tagName == asideTag
135 || tagName == baseTag
136 || tagName == basefontTag
137 || tagName == bgsoundTag
138 || tagName == blockquoteTag
139 || tagName == bodyTag
141 || tagName == buttonTag
142 || tagName == captionTag
143 || tagName == centerTag
145 || tagName == colgroupTag
146 || tagName == commandTag
148 || tagName == detailsTag
153 || tagName == embedTag
154 || tagName == fieldsetTag
155 || tagName == figcaptionTag
156 || tagName == figureTag
157 || tagName == footerTag
158 || tagName == formTag
159 || tagName == frameTag
160 || tagName == framesetTag
161 || isNumberedHeaderTag(tagName)
162 || tagName == headTag
163 || tagName == headerTag
164 || tagName == hgroupTag
166 || tagName == htmlTag
167 || tagName == iframeTag
169 || tagName == inputTag
170 || tagName == isindexTag
172 || tagName == linkTag
173 || tagName == listingTag
174 || tagName == marqueeTag
175 || tagName == menuTag
176 || tagName == metaTag
178 || tagName == noembedTag
179 || tagName == noframesTag
180 || tagName == noscriptTag
181 || tagName == objectTag
184 || tagName == paramTag
185 || tagName == plaintextTag
187 || tagName == scriptTag
188 || tagName == sectionTag
189 || tagName == selectTag
190 || tagName == styleTag
191 || tagName == summaryTag
192 || tagName == tableTag
193 || isTableBodyContextTag(tagName)
195 || tagName == textareaTag
197 || tagName == titleTag
201 || tagName == xmpTag;
204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
206 return tagName == bTag
208 || tagName == codeTag
210 || tagName == fontTag
213 || tagName == smallTag
214 || tagName == strikeTag
215 || tagName == strongTag
220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
222 return tagName == nobrTag
223 || isNonAnchorNonNobrFormattingTag(tagName);
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
227 bool isFormattingTag(const AtomicString& tagName)
229 return tagName == aTag || isNonAnchorFormattingTag(tagName);
232 HTMLFormElement* closestFormAncestor(Element* element)
235 if (element->hasTagName(formTag))
236 return static_cast<HTMLFormElement*>(element);
237 ContainerNode* parent = element->parentNode();
238 if (!parent || !parent->isElementNode())
240 element = static_cast<Element*>(parent);
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248 WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
250 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251 : m_current(token.characters().data())
252 , m_end(m_current + token.characters().size())
257 explicit ExternalCharacterTokenBuffer(const String& string)
258 : m_current(string.characters())
259 , m_end(m_current + string.length())
264 ~ExternalCharacterTokenBuffer()
269 bool isEmpty() const { return m_current == m_end; }
271 void skipAtMostOneLeadingNewline()
274 if (*m_current == '\n')
278 void skipLeadingWhitespace()
280 skipLeading<isHTMLSpace>();
283 String takeLeadingWhitespace()
285 return takeLeading<isHTMLSpace>();
288 void skipLeadingNonWhitespace()
290 skipLeading<isNotHTMLSpace>();
293 String takeRemaining()
296 const UChar* start = m_current;
298 return String(start, m_current - start);
301 void giveRemainingTo(StringBuilder& recipient)
303 recipient.append(m_current, m_end - m_current);
307 String takeRemainingWhitespace()
310 Vector<UChar> whitespace;
312 UChar cc = *m_current++;
314 whitespace.append(cc);
315 } while (m_current < m_end);
316 // Returning the null string when there aren't any whitespace
317 // characters is slightly cleaner semantically because we don't want
318 // to insert a text node (as opposed to inserting an empty text node).
319 if (whitespace.isEmpty())
321 return String::adopt(whitespace);
325 template<bool characterPredicate(UChar)>
329 while (characterPredicate(*m_current)) {
330 if (++m_current == m_end)
335 template<bool characterPredicate(UChar)>
339 const UChar* start = m_current;
340 skipLeading<characterPredicate>();
341 if (start == m_current)
343 return String(start, m_current - start);
346 const UChar* m_current;
351 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
353 , m_document(document)
354 , m_tree(document, maximumDOMTreeDepth)
355 , m_reportErrors(reportErrors)
357 , m_insertionMode(InitialMode)
358 , m_originalInsertionMode(InitialMode)
359 , m_shouldSkipLeadingNewline(false)
361 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
362 , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
363 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
367 // FIXME: Member variables should be grouped into self-initializing structs to
368 // minimize code duplication between these constructors.
369 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
371 , m_fragmentContext(fragment, contextElement, scriptingPermission)
372 , m_document(fragment->document())
373 , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
374 , m_reportErrors(false) // FIXME: Why not report errors in fragments?
376 , m_insertionMode(InitialMode)
377 , m_originalInsertionMode(InitialMode)
378 , m_shouldSkipLeadingNewline(false)
380 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
381 , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
382 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
384 // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
385 ASSERT(contextElement);
386 if (contextElement) {
387 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
388 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
389 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
390 // and instead use the DocumentFragment as a root node.
391 m_tree.openElements()->pushRootNode(fragment);
392 resetInsertionModeAppropriately();
393 m_tree.setForm(closestFormAncestor(contextElement));
397 HTMLTreeBuilder::~HTMLTreeBuilder()
401 void HTMLTreeBuilder::detach()
403 // This call makes little sense in fragment mode, but for consistency
404 // DocumentParser expects detach() to always be called before it's destroyed.
406 // HTMLConstructionSite might be on the callstack when detach() is called
407 // otherwise we'd just call m_tree.clear() here instead.
411 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
413 , m_contextElement(0)
414 , m_scriptingPermission(FragmentScriptingAllowed)
418 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
419 : m_fragment(fragment)
420 , m_contextElement(contextElement)
421 , m_scriptingPermission(scriptingPermission)
423 ASSERT(!fragment->hasChildNodes());
426 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
430 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
432 // Unpause ourselves, callers may pause us again when processing the script.
433 // The HTML5 spec is written as though scripts are executed inside the tree
434 // builder. We pause the parser to exit the tree builder, and then resume
435 // before running scripts.
437 scriptStartPosition = m_scriptToProcessStartPosition;
438 m_scriptToProcessStartPosition = uninitializedPositionValue1();
439 return m_scriptToProcess.release();
442 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
444 AtomicHTMLToken token(rawToken);
446 // We clear the rawToken in case constructTreeFromAtomicToken
447 // synchronously re-enters the parser. We don't clear the token immedately
448 // for Character tokens because the AtomicHTMLToken avoids copying the
449 // characters by keeping a pointer to the underlying buffer in the
450 // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
453 // FIXME: Stop clearing the rawToken once we start running the parser off
454 // the main thread or once we stop allowing synchronous JavaScript
455 // execution from parseAttribute.
456 if (rawToken.type() != HTMLTokenTypes::Character)
459 constructTreeFromAtomicToken(token);
461 if (!rawToken.isUninitialized()) {
462 ASSERT(rawToken.type() == HTMLTokenTypes::Character);
467 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
469 if (shouldProcessTokenInForeignContent(token))
470 processTokenInForeignContent(token);
474 bool inForeignContent = !m_tree.isEmpty()
475 && !isInHTMLNamespace(m_tree.currentNode())
476 && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentNode())
477 && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentNode());
479 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
480 m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
482 m_tree.executeQueuedTasks();
483 // We might be detached now.
486 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
488 switch (token.type()) {
489 case HTMLTokenTypes::Uninitialized:
490 ASSERT_NOT_REACHED();
492 case HTMLTokenTypes::DOCTYPE:
493 m_shouldSkipLeadingNewline = false;
494 processDoctypeToken(token);
496 case HTMLTokenTypes::StartTag:
497 m_shouldSkipLeadingNewline = false;
498 processStartTag(token);
500 case HTMLTokenTypes::EndTag:
501 m_shouldSkipLeadingNewline = false;
502 processEndTag(token);
504 case HTMLTokenTypes::Comment:
505 m_shouldSkipLeadingNewline = false;
506 processComment(token);
508 case HTMLTokenTypes::Character:
509 processCharacter(token);
511 case HTMLTokenTypes::EndOfFile:
512 m_shouldSkipLeadingNewline = false;
513 processEndOfFile(token);
518 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
520 ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
521 if (m_insertionMode == InitialMode) {
522 m_tree.insertDoctype(token);
523 setInsertionMode(BeforeHTMLMode);
526 if (m_insertionMode == InTableTextMode) {
527 defaultForInTableText();
528 processDoctypeToken(token);
534 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassOwnPtr<NamedNodeMap> attributes)
536 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
537 AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
538 processStartTag(fakeToken);
541 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
543 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
544 AtomicHTMLToken fakeToken(HTMLTokenTypes::EndTag, tagName.localName());
545 processEndTag(fakeToken);
548 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
550 ASSERT(!characters.isEmpty());
551 ExternalCharacterTokenBuffer buffer(characters);
552 processCharacterBuffer(buffer);
555 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
557 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
559 AtomicHTMLToken endP(HTMLTokenTypes::EndTag, pTag.localName());
563 PassOwnPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
565 OwnPtr<NamedNodeMap> attributes = token.takeAttributes();
567 attributes = NamedNodeMap::create();
569 attributes->removeAttribute(nameAttr);
570 attributes->removeAttribute(actionAttr);
571 attributes->removeAttribute(promptAttr);
574 RefPtr<Attribute> mappedAttribute = Attribute::create(nameAttr, isindexTag.localName());
575 attributes->insertAttribute(mappedAttribute.release(), false);
576 return attributes.release();
579 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
581 ASSERT(token.type() == HTMLTokenTypes::StartTag);
582 ASSERT(token.name() == isindexTag);
586 notImplemented(); // Acknowledge self-closing flag
587 processFakeStartTag(formTag);
588 RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
589 if (actionAttribute) {
590 ASSERT(m_tree.currentElement()->hasTagName(formTag));
591 m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
593 processFakeStartTag(hrTag);
594 processFakeStartTag(labelTag);
595 RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
597 processFakeCharacters(promptAttribute->value());
599 processFakeCharacters(searchableIndexIntroduction());
600 processFakeStartTag(inputTag, attributesForIsindexInput(token));
601 notImplemented(); // This second set of characters may be needed by non-english locales.
602 processFakeEndTag(labelTag);
603 processFakeStartTag(hrTag);
604 processFakeEndTag(formTag);
609 bool isLi(const ContainerNode* element)
611 return element->hasTagName(liTag);
614 bool isDdOrDt(const ContainerNode* element)
616 return element->hasTagName(ddTag)
617 || element->hasTagName(dtTag);
622 template <bool shouldClose(const ContainerNode*)>
623 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
625 m_framesetOk = false;
626 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
628 RefPtr<ContainerNode> node = nodeRecord->node();
629 if (shouldClose(node.get())) {
630 ASSERT(node->isElementNode());
631 processFakeEndTag(toElement(node.get())->tagQName());
634 if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
636 nodeRecord = nodeRecord->next();
638 processFakePEndTagIfPInButtonScope();
639 m_tree.insertHTMLElement(token);
644 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
646 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
648 for (size_t i = 0; i < length; ++i) {
649 const QualifiedName& name = *names[i];
650 const AtomicString& localName = name.localName();
651 AtomicString loweredLocalName = localName.lower();
652 if (loweredLocalName != localName)
653 map->add(loweredLocalName, name);
657 void adjustSVGTagNameCase(AtomicHTMLToken& token)
659 static PrefixedNameToQualifiedNameMap* caseMap = 0;
661 caseMap = new PrefixedNameToQualifiedNameMap;
663 QualifiedName** svgTags = SVGNames::getSVGTags(&length);
664 mapLoweredLocalNameToName(caseMap, svgTags, length);
667 const QualifiedName& casedName = caseMap->get(token.name());
668 if (casedName.localName().isNull())
670 token.setName(casedName.localName());
673 template<QualifiedName** getAttrs(size_t* length)>
674 void adjustAttributes(AtomicHTMLToken& token)
676 static PrefixedNameToQualifiedNameMap* caseMap = 0;
678 caseMap = new PrefixedNameToQualifiedNameMap;
680 QualifiedName** attrs = getAttrs(&length);
681 mapLoweredLocalNameToName(caseMap, attrs, length);
684 NamedNodeMap* attributes = token.attributes();
688 for (unsigned x = 0; x < attributes->length(); ++x) {
689 Attribute* attribute = attributes->attributeItem(x);
690 const QualifiedName& casedName = caseMap->get(attribute->localName());
691 if (!casedName.localName().isNull())
692 attribute->parserSetName(casedName);
696 void adjustSVGAttributes(AtomicHTMLToken& token)
698 adjustAttributes<SVGNames::getSVGAttrs>(token);
701 void adjustMathMLAttributes(AtomicHTMLToken& token)
703 adjustAttributes<MathMLNames::getMathMLAttrs>(token);
706 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
708 for (size_t i = 0; i < length; ++i) {
709 QualifiedName* name = names[i];
710 const AtomicString& localName = name->localName();
711 AtomicString prefixColonLocalName = prefix + ':' + localName;
712 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
713 map->add(prefixColonLocalName, nameWithPrefix);
717 void adjustForeignAttributes(AtomicHTMLToken& token)
719 static PrefixedNameToQualifiedNameMap* map = 0;
721 map = new PrefixedNameToQualifiedNameMap;
723 QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
724 addNamesWithPrefix(map, "xlink", attrs, length);
726 attrs = XMLNames::getXMLAttrs(&length);
727 addNamesWithPrefix(map, "xml", attrs, length);
729 map->add("xmlns", XMLNSNames::xmlnsAttr);
730 map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
733 NamedNodeMap* attributes = token.attributes();
737 for (unsigned x = 0; x < attributes->length(); ++x) {
738 Attribute* attribute = attributes->attributeItem(x);
739 const QualifiedName& name = map->get(attribute->localName());
740 if (!name.localName().isNull())
741 attribute->parserSetName(name);
747 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
749 ASSERT(token.type() == HTMLTokenTypes::StartTag);
750 if (token.name() == htmlTag) {
751 m_tree.insertHTMLHtmlStartTagInBody(token);
754 if (token.name() == baseTag
755 || token.name() == basefontTag
756 || token.name() == bgsoundTag
757 || token.name() == commandTag
758 || token.name() == linkTag
759 || token.name() == metaTag
760 || token.name() == noframesTag
761 || token.name() == scriptTag
762 || token.name() == styleTag
763 || token.name() == titleTag) {
764 bool didProcess = processStartTagForInHead(token);
765 ASSERT_UNUSED(didProcess, didProcess);
768 if (token.name() == bodyTag) {
769 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
770 ASSERT(isParsingFragment());
773 m_framesetOk = false;
774 m_tree.insertHTMLBodyStartTagInBody(token);
777 if (token.name() == framesetTag) {
779 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
780 ASSERT(isParsingFragment());
785 ExceptionCode ec = 0;
786 m_tree.openElements()->bodyElement()->remove(ec);
788 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
789 m_tree.openElements()->popHTMLBodyElement();
790 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
791 m_tree.insertHTMLElement(token);
792 setInsertionMode(InFramesetMode);
795 if (token.name() == addressTag
796 || token.name() == articleTag
797 || token.name() == asideTag
798 || token.name() == blockquoteTag
799 || token.name() == centerTag
800 || token.name() == detailsTag
801 || token.name() == dirTag
802 || token.name() == divTag
803 || token.name() == dlTag
804 || token.name() == fieldsetTag
805 || token.name() == figcaptionTag
806 || token.name() == figureTag
807 || token.name() == footerTag
808 || token.name() == headerTag
809 || token.name() == hgroupTag
810 || token.name() == menuTag
811 || token.name() == navTag
812 || token.name() == olTag
813 || token.name() == pTag
814 || token.name() == sectionTag
815 || token.name() == summaryTag
816 || token.name() == ulTag) {
817 processFakePEndTagIfPInButtonScope();
818 m_tree.insertHTMLElement(token);
821 if (isNumberedHeaderTag(token.name())) {
822 processFakePEndTagIfPInButtonScope();
823 if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
825 m_tree.openElements()->pop();
827 m_tree.insertHTMLElement(token);
830 if (token.name() == preTag || token.name() == listingTag) {
831 processFakePEndTagIfPInButtonScope();
832 m_tree.insertHTMLElement(token);
833 m_shouldSkipLeadingNewline = true;
834 m_framesetOk = false;
837 if (token.name() == formTag) {
842 processFakePEndTagIfPInButtonScope();
843 m_tree.insertHTMLFormElement(token);
846 if (token.name() == liTag) {
847 processCloseWhenNestedTag<isLi>(token);
850 if (token.name() == ddTag || token.name() == dtTag) {
851 processCloseWhenNestedTag<isDdOrDt>(token);
854 if (token.name() == plaintextTag) {
855 processFakePEndTagIfPInButtonScope();
856 m_tree.insertHTMLElement(token);
857 m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
860 if (token.name() == buttonTag) {
861 if (m_tree.openElements()->inScope(buttonTag)) {
863 processFakeEndTag(buttonTag);
864 processStartTag(token); // FIXME: Could we just fall through here?
867 m_tree.reconstructTheActiveFormattingElements();
868 m_tree.insertHTMLElement(token);
869 m_framesetOk = false;
872 if (token.name() == aTag) {
873 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
876 processFakeEndTag(aTag);
877 m_tree.activeFormattingElements()->remove(activeATag);
878 if (m_tree.openElements()->contains(activeATag))
879 m_tree.openElements()->remove(activeATag);
881 m_tree.reconstructTheActiveFormattingElements();
882 m_tree.insertFormattingElement(token);
885 if (isNonAnchorNonNobrFormattingTag(token.name())) {
886 m_tree.reconstructTheActiveFormattingElements();
887 m_tree.insertFormattingElement(token);
890 if (token.name() == nobrTag) {
891 m_tree.reconstructTheActiveFormattingElements();
892 if (m_tree.openElements()->inScope(nobrTag)) {
894 processFakeEndTag(nobrTag);
895 m_tree.reconstructTheActiveFormattingElements();
897 m_tree.insertFormattingElement(token);
900 if (token.name() == appletTag
901 || token.name() == marqueeTag
902 || token.name() == objectTag) {
903 m_tree.reconstructTheActiveFormattingElements();
904 m_tree.insertHTMLElement(token);
905 m_tree.activeFormattingElements()->appendMarker();
906 m_framesetOk = false;
909 if (token.name() == tableTag) {
910 if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
911 processFakeEndTag(pTag);
912 m_tree.insertHTMLElement(token);
913 m_framesetOk = false;
914 setInsertionMode(InTableMode);
917 if (token.name() == imageTag) {
919 // Apparently we're not supposed to ask.
920 token.setName(imgTag.localName());
921 // Note the fall through to the imgTag handling below!
923 if (token.name() == areaTag
924 || token.name() == brTag
925 || token.name() == embedTag
926 || token.name() == imgTag
927 || token.name() == keygenTag
928 || token.name() == wbrTag) {
929 m_tree.reconstructTheActiveFormattingElements();
930 m_tree.insertSelfClosingHTMLElement(token);
931 m_framesetOk = false;
934 if (token.name() == inputTag) {
935 RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
936 m_tree.reconstructTheActiveFormattingElements();
937 m_tree.insertSelfClosingHTMLElement(token);
938 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
939 m_framesetOk = false;
942 if (token.name() == paramTag
943 || token.name() == sourceTag
944 || token.name() == trackTag) {
945 m_tree.insertSelfClosingHTMLElement(token);
948 if (token.name() == hrTag) {
949 processFakePEndTagIfPInButtonScope();
950 m_tree.insertSelfClosingHTMLElement(token);
951 m_framesetOk = false;
954 if (token.name() == isindexTag) {
955 processIsindexStartTagForInBody(token);
958 if (token.name() == textareaTag) {
959 m_tree.insertHTMLElement(token);
960 m_shouldSkipLeadingNewline = true;
961 m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
962 m_originalInsertionMode = m_insertionMode;
963 m_framesetOk = false;
964 setInsertionMode(TextMode);
967 if (token.name() == xmpTag) {
968 processFakePEndTagIfPInButtonScope();
969 m_tree.reconstructTheActiveFormattingElements();
970 m_framesetOk = false;
971 processGenericRawTextStartTag(token);
974 if (token.name() == iframeTag) {
975 m_framesetOk = false;
976 processGenericRawTextStartTag(token);
979 if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
980 processGenericRawTextStartTag(token);
983 if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
984 processGenericRawTextStartTag(token);
987 if (token.name() == selectTag) {
988 m_tree.reconstructTheActiveFormattingElements();
989 m_tree.insertHTMLElement(token);
990 m_framesetOk = false;
991 if (m_insertionMode == InTableMode
992 || m_insertionMode == InCaptionMode
993 || m_insertionMode == InColumnGroupMode
994 || m_insertionMode == InTableBodyMode
995 || m_insertionMode == InRowMode
996 || m_insertionMode == InCellMode)
997 setInsertionMode(InSelectInTableMode);
999 setInsertionMode(InSelectMode);
1002 if (token.name() == optgroupTag || token.name() == optionTag) {
1003 if (m_tree.currentNode()->hasTagName(optionTag)) {
1004 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1005 processEndTag(endOption);
1007 m_tree.reconstructTheActiveFormattingElements();
1008 m_tree.insertHTMLElement(token);
1011 if (token.name() == rpTag || token.name() == rtTag) {
1012 if (m_tree.openElements()->inScope(rubyTag.localName())) {
1013 m_tree.generateImpliedEndTags();
1014 if (!m_tree.currentNode()->hasTagName(rubyTag))
1017 m_tree.insertHTMLElement(token);
1020 if (token.name() == MathMLNames::mathTag.localName()) {
1021 m_tree.reconstructTheActiveFormattingElements();
1022 adjustMathMLAttributes(token);
1023 adjustForeignAttributes(token);
1024 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1027 if (token.name() == SVGNames::svgTag.localName()) {
1028 m_tree.reconstructTheActiveFormattingElements();
1029 adjustSVGAttributes(token);
1030 adjustForeignAttributes(token);
1031 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1034 if (isCaptionColOrColgroupTag(token.name())
1035 || token.name() == frameTag
1036 || token.name() == headTag
1037 || isTableBodyContextTag(token.name())
1038 || isTableCellContextTag(token.name())
1039 || token.name() == trTag) {
1043 m_tree.reconstructTheActiveFormattingElements();
1044 m_tree.insertHTMLElement(token);
1047 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1049 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1050 ASSERT(isParsingFragment());
1051 // FIXME: parse error
1054 m_tree.openElements()->pop();
1055 setInsertionMode(InTableMode);
1059 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1060 void HTMLTreeBuilder::closeTheCell()
1062 ASSERT(insertionMode() == InCellMode);
1063 if (m_tree.openElements()->inTableScope(tdTag)) {
1064 ASSERT(!m_tree.openElements()->inTableScope(thTag));
1065 processFakeEndTag(tdTag);
1068 ASSERT(m_tree.openElements()->inTableScope(thTag));
1069 processFakeEndTag(thTag);
1070 ASSERT(insertionMode() == InRowMode);
1073 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1075 ASSERT(token.type() == HTMLTokenTypes::StartTag);
1076 if (token.name() == captionTag) {
1077 m_tree.openElements()->popUntilTableScopeMarker();
1078 m_tree.activeFormattingElements()->appendMarker();
1079 m_tree.insertHTMLElement(token);
1080 setInsertionMode(InCaptionMode);
1083 if (token.name() == colgroupTag) {
1084 m_tree.openElements()->popUntilTableScopeMarker();
1085 m_tree.insertHTMLElement(token);
1086 setInsertionMode(InColumnGroupMode);
1089 if (token.name() == colTag) {
1090 processFakeStartTag(colgroupTag);
1091 ASSERT(InColumnGroupMode);
1092 processStartTag(token);
1095 if (isTableBodyContextTag(token.name())) {
1096 m_tree.openElements()->popUntilTableScopeMarker();
1097 m_tree.insertHTMLElement(token);
1098 setInsertionMode(InTableBodyMode);
1101 if (isTableCellContextTag(token.name())
1102 || token.name() == trTag) {
1103 processFakeStartTag(tbodyTag);
1104 ASSERT(insertionMode() == InTableBodyMode);
1105 processStartTag(token);
1108 if (token.name() == tableTag) {
1110 if (!processTableEndTagForInTable()) {
1111 ASSERT(isParsingFragment());
1114 processStartTag(token);
1117 if (token.name() == styleTag || token.name() == scriptTag) {
1118 processStartTagForInHead(token);
1121 if (token.name() == inputTag) {
1122 Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1123 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1125 m_tree.insertSelfClosingHTMLElement(token);
1128 // Fall through to "anything else" case.
1130 if (token.name() == formTag) {
1134 m_tree.insertHTMLFormElement(token, true);
1135 m_tree.openElements()->pop();
1139 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1140 processStartTagForInBody(token);
1143 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1145 ASSERT(token.type() == HTMLTokenTypes::StartTag);
1146 switch (insertionMode()) {
1148 ASSERT(insertionMode() == InitialMode);
1149 defaultForInitial();
1151 case BeforeHTMLMode:
1152 ASSERT(insertionMode() == BeforeHTMLMode);
1153 if (token.name() == htmlTag) {
1154 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1155 setInsertionMode(BeforeHeadMode);
1158 defaultForBeforeHTML();
1160 case BeforeHeadMode:
1161 ASSERT(insertionMode() == BeforeHeadMode);
1162 if (token.name() == htmlTag) {
1163 m_tree.insertHTMLHtmlStartTagInBody(token);
1166 if (token.name() == headTag) {
1167 m_tree.insertHTMLHeadElement(token);
1168 setInsertionMode(InHeadMode);
1171 defaultForBeforeHead();
1174 ASSERT(insertionMode() == InHeadMode);
1175 if (processStartTagForInHead(token))
1180 ASSERT(insertionMode() == AfterHeadMode);
1181 if (token.name() == htmlTag) {
1182 m_tree.insertHTMLHtmlStartTagInBody(token);
1185 if (token.name() == bodyTag) {
1186 m_framesetOk = false;
1187 m_tree.insertHTMLBodyElement(token);
1188 setInsertionMode(InBodyMode);
1191 if (token.name() == framesetTag) {
1192 m_tree.insertHTMLElement(token);
1193 setInsertionMode(InFramesetMode);
1196 if (token.name() == baseTag
1197 || token.name() == basefontTag
1198 || token.name() == bgsoundTag
1199 || token.name() == linkTag
1200 || token.name() == metaTag
1201 || token.name() == noframesTag
1202 || token.name() == scriptTag
1203 || token.name() == styleTag
1204 || token.name() == titleTag) {
1206 ASSERT(m_tree.head());
1207 m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1208 processStartTagForInHead(token);
1209 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1212 if (token.name() == headTag) {
1216 defaultForAfterHead();
1219 ASSERT(insertionMode() == InBodyMode);
1220 processStartTagForInBody(token);
1223 ASSERT(insertionMode() == InTableMode);
1224 processStartTagForInTable(token);
1227 ASSERT(insertionMode() == InCaptionMode);
1228 if (isCaptionColOrColgroupTag(token.name())
1229 || isTableBodyContextTag(token.name())
1230 || isTableCellContextTag(token.name())
1231 || token.name() == trTag) {
1233 if (!processCaptionEndTagForInCaption()) {
1234 ASSERT(isParsingFragment());
1237 processStartTag(token);
1240 processStartTagForInBody(token);
1242 case InColumnGroupMode:
1243 ASSERT(insertionMode() == InColumnGroupMode);
1244 if (token.name() == htmlTag) {
1245 m_tree.insertHTMLHtmlStartTagInBody(token);
1248 if (token.name() == colTag) {
1249 m_tree.insertSelfClosingHTMLElement(token);
1252 if (!processColgroupEndTagForInColumnGroup()) {
1253 ASSERT(isParsingFragment());
1256 processStartTag(token);
1258 case InTableBodyMode:
1259 ASSERT(insertionMode() == InTableBodyMode);
1260 if (token.name() == trTag) {
1261 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1262 m_tree.insertHTMLElement(token);
1263 setInsertionMode(InRowMode);
1266 if (isTableCellContextTag(token.name())) {
1268 processFakeStartTag(trTag);
1269 ASSERT(insertionMode() == InRowMode);
1270 processStartTag(token);
1273 if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1274 // FIXME: This is slow.
1275 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1276 ASSERT(isParsingFragment());
1280 m_tree.openElements()->popUntilTableBodyScopeMarker();
1281 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1282 processFakeEndTag(m_tree.currentElement()->tagQName());
1283 processStartTag(token);
1286 processStartTagForInTable(token);
1289 ASSERT(insertionMode() == InRowMode);
1290 if (isTableCellContextTag(token.name())) {
1291 m_tree.openElements()->popUntilTableRowScopeMarker();
1292 m_tree.insertHTMLElement(token);
1293 setInsertionMode(InCellMode);
1294 m_tree.activeFormattingElements()->appendMarker();
1297 if (token.name() == trTag
1298 || isCaptionColOrColgroupTag(token.name())
1299 || isTableBodyContextTag(token.name())) {
1300 if (!processTrEndTagForInRow()) {
1301 ASSERT(isParsingFragment());
1304 ASSERT(insertionMode() == InTableBodyMode);
1305 processStartTag(token);
1308 processStartTagForInTable(token);
1311 ASSERT(insertionMode() == InCellMode);
1312 if (isCaptionColOrColgroupTag(token.name())
1313 || isTableCellContextTag(token.name())
1314 || token.name() == trTag
1315 || isTableBodyContextTag(token.name())) {
1316 // FIXME: This could be more efficient.
1317 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1318 ASSERT(isParsingFragment());
1323 processStartTag(token);
1326 processStartTagForInBody(token);
1329 case AfterAfterBodyMode:
1330 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1331 if (token.name() == htmlTag) {
1332 m_tree.insertHTMLHtmlStartTagInBody(token);
1335 setInsertionMode(InBodyMode);
1336 processStartTag(token);
1338 case InHeadNoscriptMode:
1339 ASSERT(insertionMode() == InHeadNoscriptMode);
1340 if (token.name() == htmlTag) {
1341 m_tree.insertHTMLHtmlStartTagInBody(token);
1344 if (token.name() == basefontTag
1345 || token.name() == bgsoundTag
1346 || token.name() == linkTag
1347 || token.name() == metaTag
1348 || token.name() == noframesTag
1349 || token.name() == styleTag) {
1350 bool didProcess = processStartTagForInHead(token);
1351 ASSERT_UNUSED(didProcess, didProcess);
1354 if (token.name() == htmlTag || token.name() == noscriptTag) {
1358 defaultForInHeadNoscript();
1359 processToken(token);
1361 case InFramesetMode:
1362 ASSERT(insertionMode() == InFramesetMode);
1363 if (token.name() == htmlTag) {
1364 m_tree.insertHTMLHtmlStartTagInBody(token);
1367 if (token.name() == framesetTag) {
1368 m_tree.insertHTMLElement(token);
1371 if (token.name() == frameTag) {
1372 m_tree.insertSelfClosingHTMLElement(token);
1375 if (token.name() == noframesTag) {
1376 processStartTagForInHead(token);
1381 case AfterFramesetMode:
1382 case AfterAfterFramesetMode:
1383 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1384 if (token.name() == htmlTag) {
1385 m_tree.insertHTMLHtmlStartTagInBody(token);
1388 if (token.name() == noframesTag) {
1389 processStartTagForInHead(token);
1394 case InSelectInTableMode:
1395 ASSERT(insertionMode() == InSelectInTableMode);
1396 if (token.name() == captionTag
1397 || token.name() == tableTag
1398 || isTableBodyContextTag(token.name())
1399 || token.name() == trTag
1400 || isTableCellContextTag(token.name())) {
1402 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1403 processEndTag(endSelect);
1404 processStartTag(token);
1409 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1410 if (token.name() == htmlTag) {
1411 m_tree.insertHTMLHtmlStartTagInBody(token);
1414 if (token.name() == optionTag) {
1415 if (m_tree.currentNode()->hasTagName(optionTag)) {
1416 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1417 processEndTag(endOption);
1419 m_tree.insertHTMLElement(token);
1422 if (token.name() == optgroupTag) {
1423 if (m_tree.currentNode()->hasTagName(optionTag)) {
1424 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1425 processEndTag(endOption);
1427 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1428 AtomicHTMLToken endOptgroup(HTMLTokenTypes::EndTag, optgroupTag.localName());
1429 processEndTag(endOptgroup);
1431 m_tree.insertHTMLElement(token);
1434 if (token.name() == selectTag) {
1436 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1437 processEndTag(endSelect);
1440 if (token.name() == inputTag
1441 || token.name() == keygenTag
1442 || token.name() == textareaTag) {
1444 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1445 ASSERT(isParsingFragment());
1448 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1449 processEndTag(endSelect);
1450 processStartTag(token);
1453 if (token.name() == scriptTag) {
1454 bool didProcess = processStartTagForInHead(token);
1455 ASSERT_UNUSED(didProcess, didProcess);
1459 case InTableTextMode:
1460 defaultForInTableText();
1461 processStartTag(token);
1464 ASSERT_NOT_REACHED();
1469 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1471 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1472 ASSERT(token.name() == bodyTag);
1473 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1477 notImplemented(); // Emit a more specific parse error based on stack contents.
1478 setInsertionMode(AfterBodyMode);
1482 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1484 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1485 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1487 RefPtr<ContainerNode> node = record->node();
1488 if (node->hasLocalName(token.name())) {
1489 m_tree.generateImpliedEndTags();
1490 // FIXME: The ElementRecord pointed to by record might be deleted by
1491 // the preceding call. Perhaps we should hold a RefPtr so that it
1492 // stays alive for the duration of record's scope.
1494 if (!m_tree.currentNode()->hasLocalName(token.name())) {
1496 // FIXME: This is either a bug in the spec, or a bug in our
1497 // implementation. Filed a bug with HTML5:
1498 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1499 // We might have already popped the node for the token in
1500 // generateImpliedEndTags, just abort.
1501 if (!m_tree.openElements()->contains(toElement(node.get())))
1504 m_tree.openElements()->popUntilPopped(toElement(node.get()));
1507 if (isSpecialNode(node.get())) {
1511 record = record->next();
1515 // FIXME: This probably belongs on HTMLElementStack.
1516 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1518 HTMLElementStack::ElementRecord* furthestBlock = 0;
1519 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1520 for (; record; record = record->next()) {
1521 if (record->element() == formattingElement)
1522 return furthestBlock;
1523 if (isSpecialNode(record->element()))
1524 furthestBlock = record;
1526 ASSERT_NOT_REACHED();
1530 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1531 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1533 // The adoption agency algorithm is N^2. We limit the number of iterations
1534 // to stop from hanging the whole browser. This limit is specified in the
1535 // adoption agency algorithm:
1536 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1537 static const int outerIterationLimit = 8;
1538 static const int innerIterationLimit = 3;
1540 for (int i = 0; i < outerIterationLimit; ++i) {
1542 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1543 if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1545 notImplemented(); // Check the stack of open elements for a more specific parse error.
1548 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1549 if (!formattingElementRecord) {
1551 m_tree.activeFormattingElements()->remove(formattingElement);
1554 if (formattingElement != m_tree.currentElement())
1557 HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1559 if (!furthestBlock) {
1560 m_tree.openElements()->popUntilPopped(formattingElement);
1561 m_tree.activeFormattingElements()->remove(formattingElement);
1565 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1566 RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1568 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1570 HTMLElementStack::ElementRecord* node = furthestBlock;
1571 HTMLElementStack::ElementRecord* nextNode = node->next();
1572 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1573 for (int i = 0; i < innerIterationLimit; ++i) {
1577 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1579 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1580 m_tree.openElements()->remove(node->element());
1585 if (node == formattingElementRecord)
1588 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1589 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1590 nodeEntry->replaceElement(newElement.get());
1591 node->replaceElement(newElement.release());
1592 // 6.4 -- Intentionally out of order to handle the case where node
1593 // was replaced in 6.5.
1594 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1595 if (lastNode == furthestBlock)
1596 bookmark.moveToAfter(nodeEntry);
1598 if (ContainerNode* parent = lastNode->element()->parentNode())
1599 parent->parserRemoveChild(lastNode->element());
1600 node->element()->parserAddChild(lastNode->element());
1601 if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1602 lastNode->element()->lazyAttach();
1607 const AtomicString& commonAncestorTag = commonAncestor->localName();
1608 if (ContainerNode* parent = lastNode->element()->parentNode())
1609 parent->parserRemoveChild(lastNode->element());
1610 // FIXME: If this moves to HTMLConstructionSite, this check should use
1611 // causesFosterParenting(tagName) instead.
1612 if (commonAncestorTag == tableTag
1613 || commonAncestorTag == trTag
1614 || isTableBodyContextTag(commonAncestorTag))
1615 m_tree.fosterParent(lastNode->element());
1617 commonAncestor->parserAddChild(lastNode->element());
1618 ASSERT(lastNode->node()->isElementNode());
1619 ASSERT(lastNode->element()->parentNode());
1620 if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1621 lastNode->element()->lazyAttach();
1624 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1626 newElement->takeAllChildrenFrom(furthestBlock->element());
1628 Element* furthestBlockElement = furthestBlock->element();
1629 // FIXME: All this creation / parserAddChild / attach business should
1630 // be in HTMLConstructionSite. My guess is that steps 8--12
1631 // should all be in some HTMLConstructionSite function.
1632 furthestBlockElement->parserAddChild(newElement);
1633 if (furthestBlockElement->attached() && !newElement->attached()) {
1634 // Notice that newElement might already be attached if, for example, one of the reparented
1635 // children is a style element, which attaches itself automatically.
1636 newElement->attach();
1639 m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1641 m_tree.openElements()->remove(formattingElement);
1642 m_tree.openElements()->insertAbove(newElement, furthestBlock);
1646 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1648 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1650 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1652 ContainerNode* node = nodeRecord->node();
1653 if (node == m_tree.openElements()->rootNode()) {
1654 ASSERT(isParsingFragment());
1656 node = m_fragmentContext.contextElement();
1658 if (node->hasTagName(selectTag)) {
1659 ASSERT(isParsingFragment());
1660 return setInsertionMode(InSelectMode);
1662 if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1663 return setInsertionMode(InCellMode);
1664 if (node->hasTagName(trTag))
1665 return setInsertionMode(InRowMode);
1666 if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1667 return setInsertionMode(InTableBodyMode);
1668 if (node->hasTagName(captionTag))
1669 return setInsertionMode(InCaptionMode);
1670 if (node->hasTagName(colgroupTag)) {
1671 ASSERT(isParsingFragment());
1672 return setInsertionMode(InColumnGroupMode);
1674 if (node->hasTagName(tableTag))
1675 return setInsertionMode(InTableMode);
1676 if (node->hasTagName(headTag)) {
1677 ASSERT(isParsingFragment());
1678 return setInsertionMode(InBodyMode);
1680 if (node->hasTagName(bodyTag))
1681 return setInsertionMode(InBodyMode);
1682 if (node->hasTagName(framesetTag)) {
1683 ASSERT(isParsingFragment());
1684 return setInsertionMode(InFramesetMode);
1686 if (node->hasTagName(htmlTag)) {
1687 ASSERT(isParsingFragment());
1688 return setInsertionMode(BeforeHeadMode);
1691 ASSERT(isParsingFragment());
1692 return setInsertionMode(InBodyMode);
1694 nodeRecord = nodeRecord->next();
1698 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1700 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1701 if (isTableBodyContextTag(token.name())) {
1702 if (!m_tree.openElements()->inTableScope(token.name())) {
1706 m_tree.openElements()->popUntilTableBodyScopeMarker();
1707 m_tree.openElements()->pop();
1708 setInsertionMode(InTableMode);
1711 if (token.name() == tableTag) {
1712 // FIXME: This is slow.
1713 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1714 ASSERT(isParsingFragment());
1718 m_tree.openElements()->popUntilTableBodyScopeMarker();
1719 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1720 processFakeEndTag(m_tree.currentElement()->tagQName());
1721 processEndTag(token);
1724 if (token.name() == bodyTag
1725 || isCaptionColOrColgroupTag(token.name())
1726 || token.name() == htmlTag
1727 || isTableCellContextTag(token.name())
1728 || token.name() == trTag) {
1732 processEndTagForInTable(token);
1735 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1737 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1738 if (token.name() == trTag) {
1739 processTrEndTagForInRow();
1742 if (token.name() == tableTag) {
1743 if (!processTrEndTagForInRow()) {
1744 ASSERT(isParsingFragment());
1747 ASSERT(insertionMode() == InTableBodyMode);
1748 processEndTag(token);
1751 if (isTableBodyContextTag(token.name())) {
1752 if (!m_tree.openElements()->inTableScope(token.name())) {
1756 processFakeEndTag(trTag);
1757 ASSERT(insertionMode() == InTableBodyMode);
1758 processEndTag(token);
1761 if (token.name() == bodyTag
1762 || isCaptionColOrColgroupTag(token.name())
1763 || token.name() == htmlTag
1764 || isTableCellContextTag(token.name())) {
1768 processEndTagForInTable(token);
1771 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1773 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1774 if (isTableCellContextTag(token.name())) {
1775 if (!m_tree.openElements()->inTableScope(token.name())) {
1779 m_tree.generateImpliedEndTags();
1780 if (!m_tree.currentNode()->hasLocalName(token.name()))
1782 m_tree.openElements()->popUntilPopped(token.name());
1783 m_tree.activeFormattingElements()->clearToLastMarker();
1784 setInsertionMode(InRowMode);
1787 if (token.name() == bodyTag
1788 || isCaptionColOrColgroupTag(token.name())
1789 || token.name() == htmlTag) {
1793 if (token.name() == tableTag
1794 || token.name() == trTag
1795 || isTableBodyContextTag(token.name())) {
1796 if (!m_tree.openElements()->inTableScope(token.name())) {
1797 ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1802 processEndTag(token);
1805 processEndTagForInBody(token);
1808 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1810 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1811 if (token.name() == bodyTag) {
1812 processBodyEndTagForInBody(token);
1815 if (token.name() == htmlTag) {
1816 AtomicHTMLToken endBody(HTMLTokenTypes::EndTag, bodyTag.localName());
1817 if (processBodyEndTagForInBody(endBody))
1818 processEndTag(token);
1821 if (token.name() == addressTag
1822 || token.name() == articleTag
1823 || token.name() == asideTag
1824 || token.name() == blockquoteTag
1825 || token.name() == buttonTag
1826 || token.name() == centerTag
1827 || token.name() == detailsTag
1828 || token.name() == dirTag
1829 || token.name() == divTag
1830 || token.name() == dlTag
1831 || token.name() == fieldsetTag
1832 || token.name() == figcaptionTag
1833 || token.name() == figureTag
1834 || token.name() == footerTag
1835 || token.name() == headerTag
1836 || token.name() == hgroupTag
1837 || token.name() == listingTag
1838 || token.name() == menuTag
1839 || token.name() == navTag
1840 || token.name() == olTag
1841 || token.name() == preTag
1842 || token.name() == sectionTag
1843 || token.name() == summaryTag
1844 || token.name() == ulTag) {
1845 if (!m_tree.openElements()->inScope(token.name())) {
1849 m_tree.generateImpliedEndTags();
1850 if (!m_tree.currentNode()->hasLocalName(token.name()))
1852 m_tree.openElements()->popUntilPopped(token.name());
1855 if (token.name() == formTag) {
1856 RefPtr<Element> node = m_tree.takeForm();
1857 if (!node || !m_tree.openElements()->inScope(node.get())) {
1861 m_tree.generateImpliedEndTags();
1862 if (m_tree.currentElement() != node.get())
1864 m_tree.openElements()->remove(node.get());
1866 if (token.name() == pTag) {
1867 if (!m_tree.openElements()->inButtonScope(token.name())) {
1869 processFakeStartTag(pTag);
1870 ASSERT(m_tree.openElements()->inScope(token.name()));
1871 processEndTag(token);
1874 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1875 if (!m_tree.currentNode()->hasLocalName(token.name()))
1877 m_tree.openElements()->popUntilPopped(token.name());
1880 if (token.name() == liTag) {
1881 if (!m_tree.openElements()->inListItemScope(token.name())) {
1885 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1886 if (!m_tree.currentNode()->hasLocalName(token.name()))
1888 m_tree.openElements()->popUntilPopped(token.name());
1891 if (token.name() == ddTag
1892 || token.name() == dtTag) {
1893 if (!m_tree.openElements()->inScope(token.name())) {
1897 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1898 if (!m_tree.currentNode()->hasLocalName(token.name()))
1900 m_tree.openElements()->popUntilPopped(token.name());
1903 if (isNumberedHeaderTag(token.name())) {
1904 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1908 m_tree.generateImpliedEndTags();
1909 if (!m_tree.currentNode()->hasLocalName(token.name()))
1911 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1914 if (isFormattingTag(token.name())) {
1915 callTheAdoptionAgency(token);
1918 if (token.name() == appletTag
1919 || token.name() == marqueeTag
1920 || token.name() == objectTag) {
1921 if (!m_tree.openElements()->inScope(token.name())) {
1925 m_tree.generateImpliedEndTags();
1926 if (!m_tree.currentNode()->hasLocalName(token.name()))
1928 m_tree.openElements()->popUntilPopped(token.name());
1929 m_tree.activeFormattingElements()->clearToLastMarker();
1932 if (token.name() == brTag) {
1934 processFakeStartTag(brTag);
1937 processAnyOtherEndTagForInBody(token);
1940 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1942 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1943 ASSERT(isParsingFragment());
1944 // FIXME: parse error
1947 m_tree.generateImpliedEndTags();
1948 // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1949 m_tree.openElements()->popUntilPopped(captionTag.localName());
1950 m_tree.activeFormattingElements()->clearToLastMarker();
1951 setInsertionMode(InTableMode);
1955 bool HTMLTreeBuilder::processTrEndTagForInRow()
1957 if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1958 ASSERT(isParsingFragment());
1959 // FIXME: parse error
1962 m_tree.openElements()->popUntilTableRowScopeMarker();
1963 ASSERT(m_tree.currentElement()->hasTagName(trTag));
1964 m_tree.openElements()->pop();
1965 setInsertionMode(InTableBodyMode);
1969 bool HTMLTreeBuilder::processTableEndTagForInTable()
1971 if (!m_tree.openElements()->inTableScope(tableTag)) {
1972 ASSERT(isParsingFragment());
1973 // FIXME: parse error.
1976 m_tree.openElements()->popUntilPopped(tableTag.localName());
1977 resetInsertionModeAppropriately();
1981 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1983 ASSERT(token.type() == HTMLTokenTypes::EndTag);
1984 if (token.name() == tableTag) {
1985 processTableEndTagForInTable();
1988 if (token.name() == bodyTag
1989 || isCaptionColOrColgroupTag(token.name())
1990 || token.name() == htmlTag
1991 || isTableBodyContextTag(token.name())
1992 || isTableCellContextTag(token.name())
1993 || token.name() == trTag) {
1997 // Is this redirection necessary here?
1998 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1999 processEndTagForInBody(token);
2002 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2004 ASSERT(token.type() == HTMLTokenTypes::EndTag);
2005 switch (insertionMode()) {
2007 ASSERT(insertionMode() == InitialMode);
2008 defaultForInitial();
2010 case BeforeHTMLMode:
2011 ASSERT(insertionMode() == BeforeHTMLMode);
2012 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2016 defaultForBeforeHTML();
2018 case BeforeHeadMode:
2019 ASSERT(insertionMode() == BeforeHeadMode);
2020 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2024 defaultForBeforeHead();
2027 ASSERT(insertionMode() == InHeadMode);
2028 if (token.name() == headTag) {
2029 m_tree.openElements()->popHTMLHeadElement();
2030 setInsertionMode(AfterHeadMode);
2033 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2040 ASSERT(insertionMode() == AfterHeadMode);
2041 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2045 defaultForAfterHead();
2048 ASSERT(insertionMode() == InBodyMode);
2049 processEndTagForInBody(token);
2052 ASSERT(insertionMode() == InTableMode);
2053 processEndTagForInTable(token);
2056 ASSERT(insertionMode() == InCaptionMode);
2057 if (token.name() == captionTag) {
2058 processCaptionEndTagForInCaption();
2061 if (token.name() == tableTag) {
2063 if (!processCaptionEndTagForInCaption()) {
2064 ASSERT(isParsingFragment());
2067 processEndTag(token);
2070 if (token.name() == bodyTag
2071 || token.name() == colTag
2072 || token.name() == colgroupTag
2073 || token.name() == htmlTag
2074 || isTableBodyContextTag(token.name())
2075 || isTableCellContextTag(token.name())
2076 || token.name() == trTag) {
2080 processEndTagForInBody(token);
2082 case InColumnGroupMode:
2083 ASSERT(insertionMode() == InColumnGroupMode);
2084 if (token.name() == colgroupTag) {
2085 processColgroupEndTagForInColumnGroup();
2088 if (token.name() == colTag) {
2092 if (!processColgroupEndTagForInColumnGroup()) {
2093 ASSERT(isParsingFragment());
2096 processEndTag(token);
2099 ASSERT(insertionMode() == InRowMode);
2100 processEndTagForInRow(token);
2103 ASSERT(insertionMode() == InCellMode);
2104 processEndTagForInCell(token);
2106 case InTableBodyMode:
2107 ASSERT(insertionMode() == InTableBodyMode);
2108 processEndTagForInTableBody(token);
2111 ASSERT(insertionMode() == AfterBodyMode);
2112 if (token.name() == htmlTag) {
2113 if (isParsingFragment()) {
2117 setInsertionMode(AfterAfterBodyMode);
2121 case AfterAfterBodyMode:
2122 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2124 setInsertionMode(InBodyMode);
2125 processEndTag(token);
2127 case InHeadNoscriptMode:
2128 ASSERT(insertionMode() == InHeadNoscriptMode);
2129 if (token.name() == noscriptTag) {
2130 ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2131 m_tree.openElements()->pop();
2132 ASSERT(m_tree.currentElement()->hasTagName(headTag));
2133 setInsertionMode(InHeadMode);
2136 if (token.name() != brTag) {
2140 defaultForInHeadNoscript();
2141 processToken(token);
2144 if (token.name() == scriptTag) {
2145 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2147 ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2148 m_scriptToProcess = m_tree.currentElement();
2149 m_scriptToProcessStartPosition = m_lastScriptElementStartPosition;
2150 m_tree.openElements()->pop();
2151 if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2152 m_scriptToProcess->removeAllChildren();
2153 setInsertionMode(m_originalInsertionMode);
2155 // This token will not have been created by the tokenizer if a
2156 // self-closing script tag was encountered and pre-HTML5 parser
2157 // quirks are enabled. We must set the tokenizer's state to
2158 // DataState explicitly if the tokenizer didn't have a chance to.
2159 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
2160 m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
2163 m_tree.openElements()->pop();
2164 setInsertionMode(m_originalInsertionMode);
2166 case InFramesetMode:
2167 ASSERT(insertionMode() == InFramesetMode);
2168 if (token.name() == framesetTag) {
2169 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2173 m_tree.openElements()->pop();
2174 if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2175 setInsertionMode(AfterFramesetMode);
2179 case AfterFramesetMode:
2180 ASSERT(insertionMode() == AfterFramesetMode);
2181 if (token.name() == htmlTag) {
2182 setInsertionMode(AfterAfterFramesetMode);
2186 case AfterAfterFramesetMode:
2187 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2190 case InSelectInTableMode:
2191 ASSERT(insertionMode() == InSelectInTableMode);
2192 if (token.name() == captionTag
2193 || token.name() == tableTag
2194 || isTableBodyContextTag(token.name())
2195 || token.name() == trTag
2196 || isTableCellContextTag(token.name())) {
2198 if (m_tree.openElements()->inTableScope(token.name())) {
2199 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
2200 processEndTag(endSelect);
2201 processEndTag(token);
2207 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2208 if (token.name() == optgroupTag) {
2209 if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2210 processFakeEndTag(optionTag);
2211 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2212 m_tree.openElements()->pop();
2218 if (token.name() == optionTag) {
2219 if (m_tree.currentNode()->hasTagName(optionTag)) {
2220 m_tree.openElements()->pop();
2226 if (token.name() == selectTag) {
2227 if (!m_tree.openElements()->inSelectScope(token.name())) {
2228 ASSERT(isParsingFragment());
2232 m_tree.openElements()->popUntilPopped(selectTag.localName());
2233 resetInsertionModeAppropriately();
2237 case InTableTextMode:
2238 defaultForInTableText();
2239 processEndTag(token);
2244 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2246 ASSERT(token.type() == HTMLTokenTypes::Comment);
2247 if (m_insertionMode == InitialMode
2248 || m_insertionMode == BeforeHTMLMode
2249 || m_insertionMode == AfterAfterBodyMode
2250 || m_insertionMode == AfterAfterFramesetMode) {
2251 m_tree.insertCommentOnDocument(token);
2254 if (m_insertionMode == AfterBodyMode) {
2255 m_tree.insertCommentOnHTMLHtmlElement(token);
2258 if (m_insertionMode == InTableTextMode) {
2259 defaultForInTableText();
2260 processComment(token);
2263 m_tree.insertComment(token);
2266 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2268 ASSERT(token.type() == HTMLTokenTypes::Character);
2269 ExternalCharacterTokenBuffer buffer(token);
2270 processCharacterBuffer(buffer);
2273 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2276 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2277 // Note that this logic is different than the generic \r\n collapsing
2278 // handled in the input stream preprocessor. This logic is here as an
2279 // "authoring convenience" so folks can write:
2286 // without getting an extra newline at the start of their <pre> element.
2287 if (m_shouldSkipLeadingNewline) {
2288 m_shouldSkipLeadingNewline = false;
2289 buffer.skipAtMostOneLeadingNewline();
2290 if (buffer.isEmpty())
2294 switch (insertionMode()) {
2296 ASSERT(insertionMode() == InitialMode);
2297 buffer.skipLeadingWhitespace();
2298 if (buffer.isEmpty())
2300 defaultForInitial();
2303 case BeforeHTMLMode: {
2304 ASSERT(insertionMode() == BeforeHTMLMode);
2305 buffer.skipLeadingWhitespace();
2306 if (buffer.isEmpty())
2308 defaultForBeforeHTML();
2311 case BeforeHeadMode: {
2312 ASSERT(insertionMode() == BeforeHeadMode);
2313 buffer.skipLeadingWhitespace();
2314 if (buffer.isEmpty())
2316 defaultForBeforeHead();
2320 ASSERT(insertionMode() == InHeadMode);
2321 String leadingWhitespace = buffer.takeLeadingWhitespace();
2322 if (!leadingWhitespace.isEmpty())
2323 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2324 if (buffer.isEmpty())
2329 case AfterHeadMode: {
2330 ASSERT(insertionMode() == AfterHeadMode);
2331 String leadingWhitespace = buffer.takeLeadingWhitespace();
2332 if (!leadingWhitespace.isEmpty())
2333 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2334 if (buffer.isEmpty())
2336 defaultForAfterHead();
2342 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2343 processCharacterBufferForInBody(buffer);
2347 case InTableBodyMode:
2349 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2350 ASSERT(m_pendingTableCharacters.isEmpty());
2351 if (m_tree.currentNode()->isElementNode()
2352 && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
2353 || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
2354 || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
2355 || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
2356 || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
2357 m_originalInsertionMode = m_insertionMode;
2358 setInsertionMode(InTableTextMode);
2359 // Note that we fall through to the InTableTextMode case below.
2361 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2362 processCharacterBufferForInBody(buffer);
2367 case InTableTextMode: {
2368 buffer.giveRemainingTo(m_pendingTableCharacters);
2371 case InColumnGroupMode: {
2372 ASSERT(insertionMode() == InColumnGroupMode);
2373 String leadingWhitespace = buffer.takeLeadingWhitespace();
2374 if (!leadingWhitespace.isEmpty())
2375 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2376 if (buffer.isEmpty())
2378 if (!processColgroupEndTagForInColumnGroup()) {
2379 ASSERT(isParsingFragment());
2380 // The spec tells us to drop these characters on the floor.
2381 buffer.skipLeadingNonWhitespace();
2382 if (buffer.isEmpty())
2385 goto ReprocessBuffer;
2388 case AfterAfterBodyMode: {
2389 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2390 // FIXME: parse error
2391 setInsertionMode(InBodyMode);
2392 goto ReprocessBuffer;
2396 ASSERT(insertionMode() == TextMode);
2397 m_tree.insertTextNode(buffer.takeRemaining());
2400 case InHeadNoscriptMode: {
2401 ASSERT(insertionMode() == InHeadNoscriptMode);
2402 String leadingWhitespace = buffer.takeLeadingWhitespace();
2403 if (!leadingWhitespace.isEmpty())
2404 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2405 if (buffer.isEmpty())
2407 defaultForInHeadNoscript();
2408 goto ReprocessBuffer;
2411 case InFramesetMode:
2412 case AfterFramesetMode: {
2413 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2414 String leadingWhitespace = buffer.takeRemainingWhitespace();
2415 if (!leadingWhitespace.isEmpty())
2416 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2417 // FIXME: We should generate a parse error if we skipped over any
2418 // non-whitespace characters.
2421 case InSelectInTableMode:
2422 case InSelectMode: {
2423 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2424 m_tree.insertTextNode(buffer.takeRemaining());
2427 case AfterAfterFramesetMode: {
2428 String leadingWhitespace = buffer.takeRemainingWhitespace();
2429 if (!leadingWhitespace.isEmpty()) {
2430 m_tree.reconstructTheActiveFormattingElements();
2431 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2433 // FIXME: We should generate a parse error if we skipped over any
2434 // non-whitespace characters.
2440 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2442 m_tree.reconstructTheActiveFormattingElements();
2443 String characters = buffer.takeRemaining();
2444 m_tree.insertTextNode(characters);
2445 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2446 m_framesetOk = false;
2449 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2451 ASSERT(token.type() == HTMLTokenTypes::EndOfFile);
2452 switch (insertionMode()) {
2454 ASSERT(insertionMode() == InitialMode);
2455 defaultForInitial();
2457 case BeforeHTMLMode:
2458 ASSERT(insertionMode() == BeforeHTMLMode);
2459 defaultForBeforeHTML();
2461 case BeforeHeadMode:
2462 ASSERT(insertionMode() == BeforeHeadMode);
2463 defaultForBeforeHead();
2466 ASSERT(insertionMode() == InHeadMode);
2470 ASSERT(insertionMode() == AfterHeadMode);
2471 defaultForAfterHead();
2477 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2478 notImplemented(); // Emit parse error based on what elements are still open.
2481 case AfterAfterBodyMode:
2482 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2484 case InHeadNoscriptMode:
2485 ASSERT(insertionMode() == InHeadNoscriptMode);
2486 defaultForInHeadNoscript();
2487 processEndOfFile(token);
2489 case AfterFramesetMode:
2490 case AfterAfterFramesetMode:
2491 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2493 case InFramesetMode:
2495 case InTableBodyMode:
2496 case InSelectInTableMode:
2498 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2499 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2502 case InColumnGroupMode:
2503 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2504 ASSERT(isParsingFragment());
2505 return; // FIXME: Should we break here instead of returning?
2507 if (!processColgroupEndTagForInColumnGroup()) {
2508 ASSERT(isParsingFragment());
2509 return; // FIXME: Should we break here instead of returning?
2511 processEndOfFile(token);
2513 case InTableTextMode:
2514 defaultForInTableText();
2515 processEndOfFile(token);
2519 if (m_tree.currentNode()->hasTagName(scriptTag))
2520 notImplemented(); // mark the script element as "already started".
2521 m_tree.openElements()->pop();
2522 ASSERT(m_originalInsertionMode != TextMode);
2523 setInsertionMode(m_originalInsertionMode);
2524 processEndOfFile(token);
2527 ASSERT(m_tree.currentNode());
2528 m_tree.openElements()->popAll();
2531 void HTMLTreeBuilder::defaultForInitial()
2534 if (!m_fragmentContext.fragment())
2535 m_document->setCompatibilityMode(Document::QuirksMode);
2536 // FIXME: parse error
2537 setInsertionMode(BeforeHTMLMode);
2540 void HTMLTreeBuilder::defaultForBeforeHTML()
2542 AtomicHTMLToken startHTML(HTMLTokenTypes::StartTag, htmlTag.localName());
2543 m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2544 setInsertionMode(BeforeHeadMode);
2547 void HTMLTreeBuilder::defaultForBeforeHead()
2549 AtomicHTMLToken startHead(HTMLTokenTypes::StartTag, headTag.localName());
2550 processStartTag(startHead);
2553 void HTMLTreeBuilder::defaultForInHead()
2555 AtomicHTMLToken endHead(HTMLTokenTypes::EndTag, headTag.localName());
2556 processEndTag(endHead);
2559 void HTMLTreeBuilder::defaultForInHeadNoscript()
2561 AtomicHTMLToken endNoscript(HTMLTokenTypes::EndTag, noscriptTag.localName());
2562 processEndTag(endNoscript);
2565 void HTMLTreeBuilder::defaultForAfterHead()
2567 AtomicHTMLToken startBody(HTMLTokenTypes::StartTag, bodyTag.localName());
2568 processStartTag(startBody);
2569 m_framesetOk = true;
2572 void HTMLTreeBuilder::defaultForInTableText()
2574 String characters = m_pendingTableCharacters.toString();
2575 m_pendingTableCharacters.clear();
2576 if (!isAllWhitespace(characters)) {
2577 // FIXME: parse error
2578 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2579 m_tree.reconstructTheActiveFormattingElements();
2580 m_tree.insertTextNode(characters, NotAllWhitespace);
2581 m_framesetOk = false;
2582 setInsertionMode(m_originalInsertionMode);
2585 m_tree.insertTextNode(characters);
2586 setInsertionMode(m_originalInsertionMode);
2589 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2591 ASSERT(token.type() == HTMLTokenTypes::StartTag);
2592 if (token.name() == htmlTag) {
2593 m_tree.insertHTMLHtmlStartTagInBody(token);
2596 if (token.name() == baseTag
2597 || token.name() == basefontTag
2598 || token.name() == bgsoundTag
2599 || token.name() == commandTag
2600 || token.name() == linkTag
2601 || token.name() == metaTag) {
2602 m_tree.insertSelfClosingHTMLElement(token);
2603 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2606 if (token.name() == titleTag) {
2607 processGenericRCDATAStartTag(token);
2610 if (token.name() == noscriptTag) {
2611 if (scriptEnabled(m_document->frame())) {
2612 processGenericRawTextStartTag(token);
2615 m_tree.insertHTMLElement(token);
2616 setInsertionMode(InHeadNoscriptMode);
2619 if (token.name() == noframesTag || token.name() == styleTag) {
2620 processGenericRawTextStartTag(token);
2623 if (token.name() == scriptTag) {
2624 processScriptStartTag(token);
2625 if (m_usePreHTML5ParserQuirks && token.selfClosing())
2626 processFakeEndTag(scriptTag);
2629 if (token.name() == headTag) {
2636 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2638 ASSERT(token.type() == HTMLTokenTypes::StartTag);
2639 m_tree.insertHTMLElement(token);
2640 m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
2641 m_originalInsertionMode = m_insertionMode;
2642 setInsertionMode(TextMode);
2645 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2647 ASSERT(token.type() == HTMLTokenTypes::StartTag);
2648 m_tree.insertHTMLElement(token);
2649 m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
2650 m_originalInsertionMode = m_insertionMode;
2651 setInsertionMode(TextMode);
2654 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2656 ASSERT(token.type() == HTMLTokenTypes::StartTag);
2657 m_tree.insertScriptElement(token);
2658 m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
2659 m_originalInsertionMode = m_insertionMode;
2661 TextPosition position = m_parser->textPosition();
2663 ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
2665 m_lastScriptElementStartPosition = position;
2667 setInsertionMode(TextMode);
2670 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2671 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken& token)
2673 if (m_tree.isEmpty())
2675 ContainerNode* node = m_tree.currentNode();
2676 if (isInHTMLNamespace(node))
2678 if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
2679 if (token.type() == HTMLTokenTypes::StartTag
2680 && token.name() != MathMLNames::mglyphTag
2681 && token.name() != MathMLNames::malignmarkTag)
2683 if (token.type() == HTMLTokenTypes::Character)
2686 if (node->hasTagName(MathMLNames::annotation_xmlTag)
2687 && token.type() == HTMLTokenTypes::StartTag
2688 && token.name() == SVGNames::svgTag)
2690 if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
2691 if (token.type() == HTMLTokenTypes::StartTag)
2693 if (token.type() == HTMLTokenTypes::Character)
2696 if (token.type() == HTMLTokenTypes::EndOfFile)
2701 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken& token)
2703 switch (token.type()) {
2704 case HTMLTokenTypes::Uninitialized:
2705 ASSERT_NOT_REACHED();
2707 case HTMLTokenTypes::DOCTYPE:
2710 case HTMLTokenTypes::StartTag: {
2711 if (token.name() == bTag
2712 || token.name() == bigTag
2713 || token.name() == blockquoteTag
2714 || token.name() == bodyTag
2715 || token.name() == brTag
2716 || token.name() == centerTag
2717 || token.name() == codeTag
2718 || token.name() == ddTag
2719 || token.name() == divTag
2720 || token.name() == dlTag
2721 || token.name() == dtTag
2722 || token.name() == emTag
2723 || token.name() == embedTag
2724 || isNumberedHeaderTag(token.name())
2725 || token.name() == headTag
2726 || token.name() == hrTag
2727 || token.name() == iTag
2728 || token.name() == imgTag
2729 || token.name() == liTag
2730 || token.name() == listingTag
2731 || token.name() == menuTag
2732 || token.name() == metaTag
2733 || token.name() == nobrTag
2734 || token.name() == olTag
2735 || token.name() == pTag
2736 || token.name() == preTag
2737 || token.name() == rubyTag
2738 || token.name() == sTag
2739 || token.name() == smallTag
2740 || token.name() == spanTag
2741 || token.name() == strongTag
2742 || token.name() == strikeTag
2743 || token.name() == subTag
2744 || token.name() == supTag
2745 || token.name() == tableTag
2746 || token.name() == ttTag
2747 || token.name() == uTag
2748 || token.name() == ulTag
2749 || token.name() == varTag
2750 || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
2752 m_tree.openElements()->popUntilForeignContentScopeMarker();
2753 processStartTag(token);
2756 const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
2757 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2758 adjustMathMLAttributes(token);
2759 if (currentNamespace == SVGNames::svgNamespaceURI) {
2760 adjustSVGTagNameCase(token);
2761 adjustSVGAttributes(token);
2763 adjustForeignAttributes(token);
2764 m_tree.insertForeignElement(token, currentNamespace);
2767 case HTMLTokenTypes::EndTag: {
2768 if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
2769 adjustSVGTagNameCase(token);
2771 if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2773 m_scriptToProcess = m_tree.currentElement();
2774 m_tree.openElements()->pop();
2777 if (!isInHTMLNamespace(m_tree.currentNode())) {
2778 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2779 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2780 if (!nodeRecord->node()->hasLocalName(token.name()))
2783 if (nodeRecord->node()->hasLocalName(token.name())) {
2784 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2787 nodeRecord = nodeRecord->next();
2789 if (isInHTMLNamespace(nodeRecord->node()))
2793 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2794 processEndTag(token);
2797 case HTMLTokenTypes::Comment:
2798 m_tree.insertComment(token);
2800 case HTMLTokenTypes::Character: {
2801 String characters = String(token.characters().data(), token.characters().size());
2802 m_tree.insertTextNode(characters);
2803 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2804 m_framesetOk = false;
2807 case HTMLTokenTypes::EndOfFile:
2808 ASSERT_NOT_REACHED();
2813 void HTMLTreeBuilder::finished()
2815 if (isParsingFragment())
2819 // Warning, this may detach the parser. Do not do anything else after this.
2820 m_document->finishedParsing();
2823 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2827 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2831 return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2834 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2838 return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);