2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "HTMLTreeBuilder.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLStackItem.h"
44 #include "HTMLToken.h"
45 #include "HTMLTokenizer.h"
46 #include "LocalizedStrings.h"
47 #include "MathMLNames.h"
48 #include "NotImplemented.h"
51 #include "XLinkNames.h"
52 #include "XMLNSNames.h"
54 #include <wtf/unicode/CharacterNames.h>
58 using namespace HTMLNames;
60 static TextPosition uninitializedPositionValue1()
62 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
67 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 return isHTMLSpace(character) || character == replacementCharacter;
72 inline bool isAllWhitespace(const String& string)
74 return string.isAllSpecialCharacters<isHTMLSpace>();
77 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
82 bool isNumberedHeaderTag(const AtomicString& tagName)
84 return tagName == h1Tag
92 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 return tagName == captionTag
96 || tagName == colgroupTag;
99 bool isTableCellContextTag(const AtomicString& tagName)
101 return tagName == thTag || tagName == tdTag;
104 bool isTableBodyContextTag(const AtomicString& tagName)
106 return tagName == tbodyTag
107 || tagName == tfootTag
108 || tagName == theadTag;
111 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
112 bool isSpecialNode(Node* node)
114 if (node->hasTagName(MathMLNames::miTag)
115 || node->hasTagName(MathMLNames::moTag)
116 || node->hasTagName(MathMLNames::mnTag)
117 || node->hasTagName(MathMLNames::msTag)
118 || node->hasTagName(MathMLNames::mtextTag)
119 || node->hasTagName(MathMLNames::annotation_xmlTag)
120 || node->hasTagName(SVGNames::foreignObjectTag)
121 || node->hasTagName(SVGNames::descTag)
122 || node->hasTagName(SVGNames::titleTag))
124 if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126 if (!isInHTMLNamespace(node))
128 const AtomicString& tagName = node->localName();
129 return tagName == addressTag
130 || tagName == appletTag
131 || tagName == areaTag
132 || tagName == articleTag
133 || tagName == asideTag
134 || tagName == baseTag
135 || tagName == basefontTag
136 || tagName == bgsoundTag
137 || tagName == blockquoteTag
138 || tagName == bodyTag
140 || tagName == buttonTag
141 || tagName == captionTag
142 || tagName == centerTag
144 || tagName == colgroupTag
145 || tagName == commandTag
147 || tagName == detailsTag
152 || tagName == embedTag
153 || tagName == fieldsetTag
154 || tagName == figcaptionTag
155 || tagName == figureTag
156 || tagName == footerTag
157 || tagName == formTag
158 || tagName == frameTag
159 || tagName == framesetTag
160 || isNumberedHeaderTag(tagName)
161 || tagName == headTag
162 || tagName == headerTag
163 || tagName == hgroupTag
165 || tagName == htmlTag
166 || tagName == iframeTag
168 || tagName == inputTag
169 || tagName == isindexTag
171 || tagName == linkTag
172 || tagName == listingTag
173 || tagName == marqueeTag
174 || tagName == menuTag
175 || tagName == metaTag
177 || tagName == noembedTag
178 || tagName == noframesTag
179 || tagName == noscriptTag
180 || tagName == objectTag
183 || tagName == paramTag
184 || tagName == plaintextTag
186 || tagName == scriptTag
187 || tagName == sectionTag
188 || tagName == selectTag
189 || tagName == styleTag
190 || tagName == summaryTag
191 || tagName == tableTag
192 || isTableBodyContextTag(tagName)
194 || tagName == textareaTag
196 || tagName == titleTag
200 || tagName == xmpTag;
203 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 return tagName == bTag
207 || tagName == codeTag
209 || tagName == fontTag
212 || tagName == smallTag
213 || tagName == strikeTag
214 || tagName == strongTag
219 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 return tagName == nobrTag
222 || isNonAnchorNonNobrFormattingTag(tagName);
225 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
226 bool isFormattingTag(const AtomicString& tagName)
228 return tagName == aTag || isNonAnchorFormattingTag(tagName);
231 HTMLFormElement* closestFormAncestor(Element* element)
234 if (element->hasTagName(formTag))
235 return static_cast<HTMLFormElement*>(element);
236 ContainerNode* parent = element->parentNode();
237 if (!parent || !parent->isElementNode())
239 element = static_cast<Element*>(parent);
246 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
247 WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken* token)
250 : m_current(token->characters().data())
251 , m_end(m_current + token->characters().size())
256 explicit ExternalCharacterTokenBuffer(const String& string)
257 : m_current(string.characters())
258 , m_end(m_current + string.length())
263 ~ExternalCharacterTokenBuffer()
268 bool isEmpty() const { return m_current == m_end; }
270 void skipAtMostOneLeadingNewline()
273 if (*m_current == '\n')
277 void skipLeadingWhitespace()
279 skipLeading<isHTMLSpace>();
282 String takeLeadingWhitespace()
284 return takeLeading<isHTMLSpace>();
287 void skipLeadingNonWhitespace()
289 skipLeading<isNotHTMLSpace>();
292 String takeRemaining()
295 const UChar* start = m_current;
297 return String(start, m_current - start);
300 void giveRemainingTo(StringBuilder& recipient)
302 recipient.append(m_current, m_end - m_current);
306 String takeRemainingWhitespace()
309 Vector<UChar> whitespace;
311 UChar cc = *m_current++;
313 whitespace.append(cc);
314 } while (m_current < m_end);
315 // Returning the null string when there aren't any whitespace
316 // characters is slightly cleaner semantically because we don't want
317 // to insert a text node (as opposed to inserting an empty text node).
318 if (whitespace.isEmpty())
320 return String::adopt(whitespace);
324 template<bool characterPredicate(UChar)>
328 while (characterPredicate(*m_current)) {
329 if (++m_current == m_end)
334 template<bool characterPredicate(UChar)>
338 const UChar* start = m_current;
339 skipLeading<characterPredicate>();
340 if (start == m_current)
342 return String(start, m_current - start);
345 const UChar* m_current;
350 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
352 , m_document(document)
353 , m_tree(document, maximumDOMTreeDepth)
354 , m_insertionMode(InitialMode)
355 , m_originalInsertionMode(InitialMode)
356 , m_shouldSkipLeadingNewline(false)
358 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
359 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
363 // FIXME: Member variables should be grouped into self-initializing structs to
364 // minimize code duplication between these constructors.
365 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
367 , m_fragmentContext(fragment, contextElement, scriptingPermission)
368 , m_document(fragment->document())
369 , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
370 , m_insertionMode(InitialMode)
371 , m_originalInsertionMode(InitialMode)
372 , m_shouldSkipLeadingNewline(false)
374 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
375 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
377 // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
378 ASSERT(contextElement);
379 if (contextElement) {
380 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
381 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
382 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
383 // and instead use the DocumentFragment as a root node.
384 m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment));
385 resetInsertionModeAppropriately();
386 m_tree.setForm(closestFormAncestor(contextElement));
390 HTMLTreeBuilder::~HTMLTreeBuilder()
394 void HTMLTreeBuilder::detach()
396 // This call makes little sense in fragment mode, but for consistency
397 // DocumentParser expects detach() to always be called before it's destroyed.
399 // HTMLConstructionSite might be on the callstack when detach() is called
400 // otherwise we'd just call m_tree.clear() here instead.
404 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
406 , m_contextElement(0)
407 , m_scriptingPermission(AllowScriptingContent)
411 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
412 : m_fragment(fragment)
413 , m_contextElement(contextElement)
414 , m_scriptingPermission(scriptingPermission)
416 ASSERT(!fragment->hasChildNodes());
419 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
423 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
425 ASSERT(m_scriptToProcess);
426 // Unpause ourselves, callers may pause us again when processing the script.
427 // The HTML5 spec is written as though scripts are executed inside the tree
428 // builder. We pause the parser to exit the tree builder, and then resume
429 // before running scripts.
430 scriptStartPosition = m_scriptToProcessStartPosition;
431 m_scriptToProcessStartPosition = uninitializedPositionValue1();
432 return m_scriptToProcess.release();
435 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
437 RefPtr<AtomicHTMLToken> token = AtomicHTMLToken::create(rawToken);
439 // We clear the rawToken in case constructTreeFromAtomicToken
440 // synchronously re-enters the parser. We don't clear the token immedately
441 // for Character tokens because the AtomicHTMLToken avoids copying the
442 // characters by keeping a pointer to the underlying buffer in the
443 // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
446 // FIXME: Stop clearing the rawToken once we start running the parser off
447 // the main thread or once we stop allowing synchronous JavaScript
448 // execution from parseAttribute.
449 if (rawToken.type() != HTMLTokenTypes::Character)
452 constructTreeFromAtomicToken(token.get());
454 if (!rawToken.isUninitialized()) {
455 ASSERT(rawToken.type() == HTMLTokenTypes::Character);
460 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken* token)
462 if (shouldProcessTokenInForeignContent(token))
463 processTokenInForeignContent(token);
467 bool inForeignContent = !m_tree.isEmpty()
468 && !isInHTMLNamespace(m_tree.currentNode())
469 && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentNode())
470 && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentNode());
472 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
473 m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
475 m_tree.executeQueuedTasks();
476 // We might be detached now.
479 void HTMLTreeBuilder::processToken(AtomicHTMLToken* token)
481 switch (token->type()) {
482 case HTMLTokenTypes::Uninitialized:
483 ASSERT_NOT_REACHED();
485 case HTMLTokenTypes::DOCTYPE:
486 m_shouldSkipLeadingNewline = false;
487 processDoctypeToken(token);
489 case HTMLTokenTypes::StartTag:
490 m_shouldSkipLeadingNewline = false;
491 processStartTag(token);
493 case HTMLTokenTypes::EndTag:
494 m_shouldSkipLeadingNewline = false;
495 processEndTag(token);
497 case HTMLTokenTypes::Comment:
498 m_shouldSkipLeadingNewline = false;
499 processComment(token);
501 case HTMLTokenTypes::Character:
502 processCharacter(token);
504 case HTMLTokenTypes::EndOfFile:
505 m_shouldSkipLeadingNewline = false;
506 processEndOfFile(token);
511 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token)
513 ASSERT(token->type() == HTMLTokenTypes::DOCTYPE);
514 if (m_insertionMode == InitialMode) {
515 m_tree.insertDoctype(token);
516 setInsertionMode(BeforeHTMLMode);
519 if (m_insertionMode == InTableTextMode) {
520 defaultForInTableText();
521 processDoctypeToken(token);
527 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
529 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
530 RefPtr<AtomicHTMLToken> fakeToken = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
531 processStartTag(fakeToken.get());
534 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
536 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
537 RefPtr<AtomicHTMLToken> fakeToken = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, tagName.localName());
538 processEndTag(fakeToken.get());
541 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
543 ASSERT(!characters.isEmpty());
544 ExternalCharacterTokenBuffer buffer(characters);
545 processCharacterBuffer(buffer);
548 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
550 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
552 RefPtr<AtomicHTMLToken> endP = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, pTag.localName());
553 processEndTag(endP.get());
556 Vector<Attribute> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken* token)
558 Vector<Attribute> attributes = token->attributes();
559 for (int i = attributes.size() - 1; i >= 0; --i) {
560 const QualifiedName& name = attributes.at(i).name();
561 if (name.matches(nameAttr) || name.matches(actionAttr) || name.matches(promptAttr))
562 attributes.remove(i);
565 attributes.append(Attribute(nameAttr, isindexTag.localName()));
569 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken* token)
571 ASSERT(token->type() == HTMLTokenTypes::StartTag);
572 ASSERT(token->name() == isindexTag);
576 notImplemented(); // Acknowledge self-closing flag
577 processFakeStartTag(formTag);
578 Attribute* actionAttribute = token->getAttributeItem(actionAttr);
580 m_tree.form()->setAttribute(actionAttr, actionAttribute->value());
581 processFakeStartTag(hrTag);
582 processFakeStartTag(labelTag);
583 Attribute* promptAttribute = token->getAttributeItem(promptAttr);
585 processFakeCharacters(promptAttribute->value());
587 processFakeCharacters(searchableIndexIntroduction());
588 processFakeStartTag(inputTag, attributesForIsindexInput(token));
589 notImplemented(); // This second set of characters may be needed by non-english locales.
590 processFakeEndTag(labelTag);
591 processFakeStartTag(hrTag);
592 processFakeEndTag(formTag);
597 bool isLi(const ContainerNode* element)
599 return element->hasTagName(liTag);
602 bool isDdOrDt(const ContainerNode* element)
604 return element->hasTagName(ddTag)
605 || element->hasTagName(dtTag);
610 template <bool shouldClose(const ContainerNode*)>
611 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token)
613 m_framesetOk = false;
614 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
616 RefPtr<ContainerNode> node = nodeRecord->node();
617 if (shouldClose(node.get())) {
618 ASSERT(node->isElementNode());
619 processFakeEndTag(toElement(node.get())->tagQName());
622 if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
624 nodeRecord = nodeRecord->next();
626 processFakePEndTagIfPInButtonScope();
627 m_tree.insertHTMLElement(token);
632 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
634 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
636 for (size_t i = 0; i < length; ++i) {
637 const QualifiedName& name = *names[i];
638 const AtomicString& localName = name.localName();
639 AtomicString loweredLocalName = localName.lower();
640 if (loweredLocalName != localName)
641 map->add(loweredLocalName, name);
645 void adjustSVGTagNameCase(AtomicHTMLToken* token)
647 static PrefixedNameToQualifiedNameMap* caseMap = 0;
649 caseMap = new PrefixedNameToQualifiedNameMap;
651 QualifiedName** svgTags = SVGNames::getSVGTags(&length);
652 mapLoweredLocalNameToName(caseMap, svgTags, length);
655 const QualifiedName& casedName = caseMap->get(token->name());
656 if (casedName.localName().isNull())
658 token->setName(casedName.localName());
661 template<QualifiedName** getAttrs(size_t* length)>
662 void adjustAttributes(AtomicHTMLToken* token)
664 static PrefixedNameToQualifiedNameMap* caseMap = 0;
666 caseMap = new PrefixedNameToQualifiedNameMap;
668 QualifiedName** attrs = getAttrs(&length);
669 mapLoweredLocalNameToName(caseMap, attrs, length);
672 for (unsigned i = 0; i < token->attributes().size(); ++i) {
673 Attribute& tokenAttribute = token->attributes().at(i);
674 const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
675 if (!casedName.localName().isNull())
676 tokenAttribute.parserSetName(casedName);
680 void adjustSVGAttributes(AtomicHTMLToken* token)
682 adjustAttributes<SVGNames::getSVGAttrs>(token);
685 void adjustMathMLAttributes(AtomicHTMLToken* token)
687 adjustAttributes<MathMLNames::getMathMLAttrs>(token);
690 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
692 for (size_t i = 0; i < length; ++i) {
693 QualifiedName* name = names[i];
694 const AtomicString& localName = name->localName();
695 AtomicString prefixColonLocalName = prefix + ':' + localName;
696 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
697 map->add(prefixColonLocalName, nameWithPrefix);
701 void adjustForeignAttributes(AtomicHTMLToken* token)
703 static PrefixedNameToQualifiedNameMap* map = 0;
705 map = new PrefixedNameToQualifiedNameMap;
707 QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
708 addNamesWithPrefix(map, "xlink", attrs, length);
710 attrs = XMLNames::getXMLAttrs(&length);
711 addNamesWithPrefix(map, "xml", attrs, length);
713 map->add("xmlns", XMLNSNames::xmlnsAttr);
714 map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
717 for (unsigned i = 0; i < token->attributes().size(); ++i) {
718 Attribute& tokenAttribute = token->attributes().at(i);
719 const QualifiedName& name = map->get(tokenAttribute.localName());
720 if (!name.localName().isNull())
721 tokenAttribute.parserSetName(name);
727 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
729 ASSERT(token->type() == HTMLTokenTypes::StartTag);
730 if (token->name() == htmlTag) {
731 processHtmlStartTagForInBody(token);
734 if (token->name() == baseTag
735 || token->name() == basefontTag
736 || token->name() == bgsoundTag
737 || token->name() == commandTag
738 || token->name() == linkTag
739 || token->name() == metaTag
740 || token->name() == noframesTag
741 || token->name() == scriptTag
742 || token->name() == styleTag
743 || token->name() == titleTag) {
744 bool didProcess = processStartTagForInHead(token);
745 ASSERT_UNUSED(didProcess, didProcess);
748 if (token->name() == bodyTag) {
750 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
751 ASSERT(isParsingFragment());
754 m_framesetOk = false;
755 m_tree.insertHTMLBodyStartTagInBody(token);
758 if (token->name() == framesetTag) {
760 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
761 ASSERT(isParsingFragment());
766 ExceptionCode ec = 0;
767 m_tree.openElements()->bodyElement()->remove(ec);
769 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
770 m_tree.openElements()->popHTMLBodyElement();
771 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
772 m_tree.insertHTMLElement(token);
773 setInsertionMode(InFramesetMode);
776 if (token->name() == addressTag
777 || token->name() == articleTag
778 || token->name() == asideTag
779 || token->name() == blockquoteTag
780 || token->name() == centerTag
781 || token->name() == detailsTag
782 || token->name() == dirTag
783 || token->name() == divTag
784 || token->name() == dlTag
785 || token->name() == fieldsetTag
786 || token->name() == figcaptionTag
787 || token->name() == figureTag
788 || token->name() == footerTag
789 || token->name() == headerTag
790 || token->name() == hgroupTag
791 || token->name() == menuTag
792 || token->name() == navTag
793 || token->name() == olTag
794 || token->name() == pTag
795 || token->name() == sectionTag
796 || token->name() == summaryTag
797 || token->name() == ulTag) {
798 processFakePEndTagIfPInButtonScope();
799 m_tree.insertHTMLElement(token);
802 if (isNumberedHeaderTag(token->name())) {
803 processFakePEndTagIfPInButtonScope();
804 if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
806 m_tree.openElements()->pop();
808 m_tree.insertHTMLElement(token);
811 if (token->name() == preTag || token->name() == listingTag) {
812 processFakePEndTagIfPInButtonScope();
813 m_tree.insertHTMLElement(token);
814 m_shouldSkipLeadingNewline = true;
815 m_framesetOk = false;
818 if (token->name() == formTag) {
823 processFakePEndTagIfPInButtonScope();
824 m_tree.insertHTMLFormElement(token);
827 if (token->name() == liTag) {
828 processCloseWhenNestedTag<isLi>(token);
831 if (token->name() == ddTag || token->name() == dtTag) {
832 processCloseWhenNestedTag<isDdOrDt>(token);
835 if (token->name() == plaintextTag) {
836 processFakePEndTagIfPInButtonScope();
837 m_tree.insertHTMLElement(token);
838 m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
841 if (token->name() == buttonTag) {
842 if (m_tree.openElements()->inScope(buttonTag)) {
844 processFakeEndTag(buttonTag);
845 processStartTag(token); // FIXME: Could we just fall through here?
848 m_tree.reconstructTheActiveFormattingElements();
849 m_tree.insertHTMLElement(token);
850 m_framesetOk = false;
853 if (token->name() == aTag) {
854 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
857 processFakeEndTag(aTag);
858 m_tree.activeFormattingElements()->remove(activeATag);
859 if (m_tree.openElements()->contains(activeATag))
860 m_tree.openElements()->remove(activeATag);
862 m_tree.reconstructTheActiveFormattingElements();
863 m_tree.insertFormattingElement(token);
866 if (isNonAnchorNonNobrFormattingTag(token->name())) {
867 m_tree.reconstructTheActiveFormattingElements();
868 m_tree.insertFormattingElement(token);
871 if (token->name() == nobrTag) {
872 m_tree.reconstructTheActiveFormattingElements();
873 if (m_tree.openElements()->inScope(nobrTag)) {
875 processFakeEndTag(nobrTag);
876 m_tree.reconstructTheActiveFormattingElements();
878 m_tree.insertFormattingElement(token);
881 if (token->name() == appletTag
882 || token->name() == marqueeTag
883 || token->name() == objectTag) {
884 m_tree.reconstructTheActiveFormattingElements();
885 m_tree.insertHTMLElement(token);
886 m_tree.activeFormattingElements()->appendMarker();
887 m_framesetOk = false;
890 if (token->name() == tableTag) {
891 if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
892 processFakeEndTag(pTag);
893 m_tree.insertHTMLElement(token);
894 m_framesetOk = false;
895 setInsertionMode(InTableMode);
898 if (token->name() == imageTag) {
900 // Apparently we're not supposed to ask.
901 token->setName(imgTag.localName());
902 // Note the fall through to the imgTag handling below!
904 if (token->name() == areaTag
905 || token->name() == brTag
906 || token->name() == embedTag
907 || token->name() == imgTag
908 || token->name() == keygenTag
909 || token->name() == wbrTag) {
910 m_tree.reconstructTheActiveFormattingElements();
911 m_tree.insertSelfClosingHTMLElement(token);
912 m_framesetOk = false;
915 if (token->name() == inputTag) {
916 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
917 m_tree.reconstructTheActiveFormattingElements();
918 m_tree.insertSelfClosingHTMLElement(token);
919 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
920 m_framesetOk = false;
923 if (token->name() == paramTag
924 || token->name() == sourceTag
925 || token->name() == trackTag) {
926 m_tree.insertSelfClosingHTMLElement(token);
929 if (token->name() == hrTag) {
930 processFakePEndTagIfPInButtonScope();
931 m_tree.insertSelfClosingHTMLElement(token);
932 m_framesetOk = false;
935 if (token->name() == isindexTag) {
936 processIsindexStartTagForInBody(token);
939 if (token->name() == textareaTag) {
940 m_tree.insertHTMLElement(token);
941 m_shouldSkipLeadingNewline = true;
942 m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
943 m_originalInsertionMode = m_insertionMode;
944 m_framesetOk = false;
945 setInsertionMode(TextMode);
948 if (token->name() == xmpTag) {
949 processFakePEndTagIfPInButtonScope();
950 m_tree.reconstructTheActiveFormattingElements();
951 m_framesetOk = false;
952 processGenericRawTextStartTag(token);
955 if (token->name() == iframeTag) {
956 m_framesetOk = false;
957 processGenericRawTextStartTag(token);
960 if (token->name() == noembedTag && pluginsEnabled(m_document->frame())) {
961 processGenericRawTextStartTag(token);
964 if (token->name() == noscriptTag && scriptEnabled(m_document->frame())) {
965 processGenericRawTextStartTag(token);
968 if (token->name() == selectTag) {
969 m_tree.reconstructTheActiveFormattingElements();
970 m_tree.insertHTMLElement(token);
971 m_framesetOk = false;
972 if (m_insertionMode == InTableMode
973 || m_insertionMode == InCaptionMode
974 || m_insertionMode == InColumnGroupMode
975 || m_insertionMode == InTableBodyMode
976 || m_insertionMode == InRowMode
977 || m_insertionMode == InCellMode)
978 setInsertionMode(InSelectInTableMode);
980 setInsertionMode(InSelectMode);
983 if (token->name() == optgroupTag || token->name() == optionTag) {
984 if (m_tree.currentNode()->hasTagName(optionTag)) {
985 RefPtr<AtomicHTMLToken> endOption = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, optionTag.localName());
986 processEndTag(endOption.get());
988 m_tree.reconstructTheActiveFormattingElements();
989 m_tree.insertHTMLElement(token);
992 if (token->name() == rpTag || token->name() == rtTag) {
993 if (m_tree.openElements()->inScope(rubyTag.localName())) {
994 m_tree.generateImpliedEndTags();
995 if (!m_tree.currentNode()->hasTagName(rubyTag))
998 m_tree.insertHTMLElement(token);
1001 if (token->name() == MathMLNames::mathTag.localName()) {
1002 m_tree.reconstructTheActiveFormattingElements();
1003 adjustMathMLAttributes(token);
1004 adjustForeignAttributes(token);
1005 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1008 if (token->name() == SVGNames::svgTag.localName()) {
1009 m_tree.reconstructTheActiveFormattingElements();
1010 adjustSVGAttributes(token);
1011 adjustForeignAttributes(token);
1012 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1015 if (isCaptionColOrColgroupTag(token->name())
1016 || token->name() == frameTag
1017 || token->name() == headTag
1018 || isTableBodyContextTag(token->name())
1019 || isTableCellContextTag(token->name())
1020 || token->name() == trTag) {
1024 m_tree.reconstructTheActiveFormattingElements();
1025 m_tree.insertHTMLElement(token);
1028 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1030 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1031 ASSERT(isParsingFragment());
1032 // FIXME: parse error
1035 m_tree.openElements()->pop();
1036 setInsertionMode(InTableMode);
1040 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1041 void HTMLTreeBuilder::closeTheCell()
1043 ASSERT(insertionMode() == InCellMode);
1044 if (m_tree.openElements()->inTableScope(tdTag)) {
1045 ASSERT(!m_tree.openElements()->inTableScope(thTag));
1046 processFakeEndTag(tdTag);
1049 ASSERT(m_tree.openElements()->inTableScope(thTag));
1050 processFakeEndTag(thTag);
1051 ASSERT(insertionMode() == InRowMode);
1054 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token)
1056 ASSERT(token->type() == HTMLTokenTypes::StartTag);
1057 if (token->name() == captionTag) {
1058 m_tree.openElements()->popUntilTableScopeMarker();
1059 m_tree.activeFormattingElements()->appendMarker();
1060 m_tree.insertHTMLElement(token);
1061 setInsertionMode(InCaptionMode);
1064 if (token->name() == colgroupTag) {
1065 m_tree.openElements()->popUntilTableScopeMarker();
1066 m_tree.insertHTMLElement(token);
1067 setInsertionMode(InColumnGroupMode);
1070 if (token->name() == colTag) {
1071 processFakeStartTag(colgroupTag);
1072 ASSERT(InColumnGroupMode);
1073 processStartTag(token);
1076 if (isTableBodyContextTag(token->name())) {
1077 m_tree.openElements()->popUntilTableScopeMarker();
1078 m_tree.insertHTMLElement(token);
1079 setInsertionMode(InTableBodyMode);
1082 if (isTableCellContextTag(token->name())
1083 || token->name() == trTag) {
1084 processFakeStartTag(tbodyTag);
1085 ASSERT(insertionMode() == InTableBodyMode);
1086 processStartTag(token);
1089 if (token->name() == tableTag) {
1091 if (!processTableEndTagForInTable()) {
1092 ASSERT(isParsingFragment());
1095 processStartTag(token);
1098 if (token->name() == styleTag || token->name() == scriptTag) {
1099 processStartTagForInHead(token);
1102 if (token->name() == inputTag) {
1103 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
1104 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1106 m_tree.insertSelfClosingHTMLElement(token);
1109 // Fall through to "anything else" case.
1111 if (token->name() == formTag) {
1115 m_tree.insertHTMLFormElement(token, true);
1116 m_tree.openElements()->pop();
1120 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1121 processStartTagForInBody(token);
1124 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token)
1126 ASSERT(token->type() == HTMLTokenTypes::StartTag);
1127 switch (insertionMode()) {
1129 ASSERT(insertionMode() == InitialMode);
1130 defaultForInitial();
1132 case BeforeHTMLMode:
1133 ASSERT(insertionMode() == BeforeHTMLMode);
1134 if (token->name() == htmlTag) {
1135 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1136 setInsertionMode(BeforeHeadMode);
1139 defaultForBeforeHTML();
1141 case BeforeHeadMode:
1142 ASSERT(insertionMode() == BeforeHeadMode);
1143 if (token->name() == htmlTag) {
1144 processHtmlStartTagForInBody(token);
1147 if (token->name() == headTag) {
1148 m_tree.insertHTMLHeadElement(token);
1149 setInsertionMode(InHeadMode);
1152 defaultForBeforeHead();
1155 ASSERT(insertionMode() == InHeadMode);
1156 if (processStartTagForInHead(token))
1161 ASSERT(insertionMode() == AfterHeadMode);
1162 if (token->name() == htmlTag) {
1163 processHtmlStartTagForInBody(token);
1166 if (token->name() == bodyTag) {
1167 m_framesetOk = false;
1168 m_tree.insertHTMLBodyElement(token);
1169 setInsertionMode(InBodyMode);
1172 if (token->name() == framesetTag) {
1173 m_tree.insertHTMLElement(token);
1174 setInsertionMode(InFramesetMode);
1177 if (token->name() == baseTag
1178 || token->name() == basefontTag
1179 || token->name() == bgsoundTag
1180 || token->name() == linkTag
1181 || token->name() == metaTag
1182 || token->name() == noframesTag
1183 || token->name() == scriptTag
1184 || token->name() == styleTag
1185 || token->name() == titleTag) {
1187 ASSERT(m_tree.head());
1188 m_tree.openElements()->pushHTMLHeadElement(HTMLStackItem::create(m_tree.head(), token));
1189 processStartTagForInHead(token);
1190 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1193 if (token->name() == headTag) {
1197 defaultForAfterHead();
1200 ASSERT(insertionMode() == InBodyMode);
1201 processStartTagForInBody(token);
1204 ASSERT(insertionMode() == InTableMode);
1205 processStartTagForInTable(token);
1208 ASSERT(insertionMode() == InCaptionMode);
1209 if (isCaptionColOrColgroupTag(token->name())
1210 || isTableBodyContextTag(token->name())
1211 || isTableCellContextTag(token->name())
1212 || token->name() == trTag) {
1214 if (!processCaptionEndTagForInCaption()) {
1215 ASSERT(isParsingFragment());
1218 processStartTag(token);
1221 processStartTagForInBody(token);
1223 case InColumnGroupMode:
1224 ASSERT(insertionMode() == InColumnGroupMode);
1225 if (token->name() == htmlTag) {
1226 processHtmlStartTagForInBody(token);
1229 if (token->name() == colTag) {
1230 m_tree.insertSelfClosingHTMLElement(token);
1233 if (!processColgroupEndTagForInColumnGroup()) {
1234 ASSERT(isParsingFragment());
1237 processStartTag(token);
1239 case InTableBodyMode:
1240 ASSERT(insertionMode() == InTableBodyMode);
1241 if (token->name() == trTag) {
1242 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1243 m_tree.insertHTMLElement(token);
1244 setInsertionMode(InRowMode);
1247 if (isTableCellContextTag(token->name())) {
1249 processFakeStartTag(trTag);
1250 ASSERT(insertionMode() == InRowMode);
1251 processStartTag(token);
1254 if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) {
1255 // FIXME: This is slow.
1256 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1257 ASSERT(isParsingFragment());
1261 m_tree.openElements()->popUntilTableBodyScopeMarker();
1262 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1263 processFakeEndTag(m_tree.currentElement()->tagQName());
1264 processStartTag(token);
1267 processStartTagForInTable(token);
1270 ASSERT(insertionMode() == InRowMode);
1271 if (isTableCellContextTag(token->name())) {
1272 m_tree.openElements()->popUntilTableRowScopeMarker();
1273 m_tree.insertHTMLElement(token);
1274 setInsertionMode(InCellMode);
1275 m_tree.activeFormattingElements()->appendMarker();
1278 if (token->name() == trTag
1279 || isCaptionColOrColgroupTag(token->name())
1280 || isTableBodyContextTag(token->name())) {
1281 if (!processTrEndTagForInRow()) {
1282 ASSERT(isParsingFragment());
1285 ASSERT(insertionMode() == InTableBodyMode);
1286 processStartTag(token);
1289 processStartTagForInTable(token);
1292 ASSERT(insertionMode() == InCellMode);
1293 if (isCaptionColOrColgroupTag(token->name())
1294 || isTableCellContextTag(token->name())
1295 || token->name() == trTag
1296 || isTableBodyContextTag(token->name())) {
1297 // FIXME: This could be more efficient.
1298 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1299 ASSERT(isParsingFragment());
1304 processStartTag(token);
1307 processStartTagForInBody(token);
1310 case AfterAfterBodyMode:
1311 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1312 if (token->name() == htmlTag) {
1313 processHtmlStartTagForInBody(token);
1316 setInsertionMode(InBodyMode);
1317 processStartTag(token);
1319 case InHeadNoscriptMode:
1320 ASSERT(insertionMode() == InHeadNoscriptMode);
1321 if (token->name() == htmlTag) {
1322 processHtmlStartTagForInBody(token);
1325 if (token->name() == basefontTag
1326 || token->name() == bgsoundTag
1327 || token->name() == linkTag
1328 || token->name() == metaTag
1329 || token->name() == noframesTag
1330 || token->name() == styleTag) {
1331 bool didProcess = processStartTagForInHead(token);
1332 ASSERT_UNUSED(didProcess, didProcess);
1335 if (token->name() == htmlTag || token->name() == noscriptTag) {
1339 defaultForInHeadNoscript();
1340 processToken(token);
1342 case InFramesetMode:
1343 ASSERT(insertionMode() == InFramesetMode);
1344 if (token->name() == htmlTag) {
1345 processHtmlStartTagForInBody(token);
1348 if (token->name() == framesetTag) {
1349 m_tree.insertHTMLElement(token);
1352 if (token->name() == frameTag) {
1353 m_tree.insertSelfClosingHTMLElement(token);
1356 if (token->name() == noframesTag) {
1357 processStartTagForInHead(token);
1362 case AfterFramesetMode:
1363 case AfterAfterFramesetMode:
1364 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1365 if (token->name() == htmlTag) {
1366 processHtmlStartTagForInBody(token);
1369 if (token->name() == noframesTag) {
1370 processStartTagForInHead(token);
1375 case InSelectInTableMode:
1376 ASSERT(insertionMode() == InSelectInTableMode);
1377 if (token->name() == captionTag
1378 || token->name() == tableTag
1379 || isTableBodyContextTag(token->name())
1380 || token->name() == trTag
1381 || isTableCellContextTag(token->name())) {
1383 RefPtr<AtomicHTMLToken> endSelect = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, selectTag.localName());
1384 processEndTag(endSelect.get());
1385 processStartTag(token);
1390 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1391 if (token->name() == htmlTag) {
1392 processHtmlStartTagForInBody(token);
1395 if (token->name() == optionTag) {
1396 if (m_tree.currentNode()->hasTagName(optionTag)) {
1397 RefPtr<AtomicHTMLToken> endOption = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, optionTag.localName());
1398 processEndTag(endOption.get());
1400 m_tree.insertHTMLElement(token);
1403 if (token->name() == optgroupTag) {
1404 if (m_tree.currentNode()->hasTagName(optionTag)) {
1405 RefPtr<AtomicHTMLToken> endOption = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, optionTag.localName());
1406 processEndTag(endOption.get());
1408 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1409 RefPtr<AtomicHTMLToken> endOptgroup = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, optgroupTag.localName());
1410 processEndTag(endOptgroup.get());
1412 m_tree.insertHTMLElement(token);
1415 if (token->name() == selectTag) {
1417 RefPtr<AtomicHTMLToken> endSelect = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, selectTag.localName());
1418 processEndTag(endSelect.get());
1421 if (token->name() == inputTag
1422 || token->name() == keygenTag
1423 || token->name() == textareaTag) {
1425 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1426 ASSERT(isParsingFragment());
1429 RefPtr<AtomicHTMLToken> endSelect = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, selectTag.localName());
1430 processEndTag(endSelect.get());
1431 processStartTag(token);
1434 if (token->name() == scriptTag) {
1435 bool didProcess = processStartTagForInHead(token);
1436 ASSERT_UNUSED(didProcess, didProcess);
1440 case InTableTextMode:
1441 defaultForInTableText();
1442 processStartTag(token);
1445 ASSERT_NOT_REACHED();
1450 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token)
1453 m_tree.insertHTMLHtmlStartTagInBody(token);
1456 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token)
1458 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1459 ASSERT(token->name() == bodyTag);
1460 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1464 notImplemented(); // Emit a more specific parse error based on stack contents.
1465 setInsertionMode(AfterBodyMode);
1469 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token)
1471 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1472 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1474 RefPtr<ContainerNode> node = record->node();
1475 if (node->hasLocalName(token->name())) {
1476 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1477 if (!m_tree.currentNode()->hasLocalName(token->name()))
1479 m_tree.openElements()->popUntilPopped(toElement(node.get()));
1482 if (isSpecialNode(node.get())) {
1486 record = record->next();
1490 // FIXME: This probably belongs on HTMLElementStack.
1491 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1493 HTMLElementStack::ElementRecord* furthestBlock = 0;
1494 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1495 for (; record; record = record->next()) {
1496 if (record->element() == formattingElement)
1497 return furthestBlock;
1498 if (isSpecialNode(record->element()))
1499 furthestBlock = record;
1501 ASSERT_NOT_REACHED();
1505 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1506 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token)
1508 // The adoption agency algorithm is N^2. We limit the number of iterations
1509 // to stop from hanging the whole browser. This limit is specified in the
1510 // adoption agency algorithm:
1511 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1512 static const int outerIterationLimit = 8;
1513 static const int innerIterationLimit = 3;
1515 for (int i = 0; i < outerIterationLimit; ++i) {
1517 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name());
1518 if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1520 notImplemented(); // Check the stack of open elements for a more specific parse error.
1523 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1524 if (!formattingElementRecord) {
1526 m_tree.activeFormattingElements()->remove(formattingElement);
1529 if (formattingElement != m_tree.currentElement())
1532 HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1534 if (!furthestBlock) {
1535 m_tree.openElements()->popUntilPopped(formattingElement);
1536 m_tree.activeFormattingElements()->remove(formattingElement);
1540 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1541 RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1543 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1545 HTMLElementStack::ElementRecord* node = furthestBlock;
1546 HTMLElementStack::ElementRecord* nextNode = node->next();
1547 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1548 for (int i = 0; i < innerIterationLimit; ++i) {
1552 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1554 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1555 m_tree.openElements()->remove(node->element());
1560 if (node == formattingElementRecord)
1563 RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get());
1565 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1566 nodeEntry->replaceElement(newItem);
1567 node->replaceElement(newItem.release());
1568 // 6.4 -- Intentionally out of order to handle the case where node
1569 // was replaced in 6.5.
1570 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1571 if (lastNode == furthestBlock)
1572 bookmark.moveToAfter(nodeEntry);
1574 if (ContainerNode* parent = lastNode->element()->parentNode())
1575 parent->parserRemoveChild(lastNode->element());
1576 node->element()->parserAddChild(lastNode->element());
1577 if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1578 lastNode->element()->lazyAttach();
1583 const AtomicString& commonAncestorTag = commonAncestor->localName();
1584 if (ContainerNode* parent = lastNode->element()->parentNode())
1585 parent->parserRemoveChild(lastNode->element());
1586 // FIXME: If this moves to HTMLConstructionSite, this check should use
1587 // causesFosterParenting(tagName) instead.
1588 if (commonAncestorTag == tableTag
1589 || commonAncestorTag == trTag
1590 || isTableBodyContextTag(commonAncestorTag))
1591 m_tree.fosterParent(lastNode->element());
1593 commonAncestor->parserAddChild(lastNode->element());
1594 ASSERT(lastNode->node()->isElementNode());
1595 ASSERT(lastNode->element()->parentNode());
1596 if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1597 lastNode->element()->lazyAttach();
1600 RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get());
1602 newItem->element()->takeAllChildrenFrom(furthestBlock->element());
1604 Element* furthestBlockElement = furthestBlock->element();
1605 // FIXME: All this creation / parserAddChild / attach business should
1606 // be in HTMLConstructionSite. My guess is that steps 8--12
1607 // should all be in some HTMLConstructionSite function.
1608 furthestBlockElement->parserAddChild(newItem->element());
1609 if (furthestBlockElement->attached() && !newItem->element()->attached()) {
1610 // Notice that newItem->element() might already be attached if, for example, one of the reparented
1611 // children is a style element, which attaches itself automatically.
1612 newItem->element()->attach();
1615 m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark);
1617 m_tree.openElements()->remove(formattingElement);
1618 m_tree.openElements()->insertAbove(newItem, furthestBlock);
1622 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1624 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1626 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1628 ContainerNode* node = nodeRecord->node();
1629 if (node == m_tree.openElements()->rootNode()) {
1630 ASSERT(isParsingFragment());
1632 node = m_fragmentContext.contextElement();
1634 if (node->hasTagName(selectTag)) {
1635 ASSERT(isParsingFragment());
1636 return setInsertionMode(InSelectMode);
1638 if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1639 return setInsertionMode(InCellMode);
1640 if (node->hasTagName(trTag))
1641 return setInsertionMode(InRowMode);
1642 if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1643 return setInsertionMode(InTableBodyMode);
1644 if (node->hasTagName(captionTag))
1645 return setInsertionMode(InCaptionMode);
1646 if (node->hasTagName(colgroupTag)) {
1647 ASSERT(isParsingFragment());
1648 return setInsertionMode(InColumnGroupMode);
1650 if (node->hasTagName(tableTag))
1651 return setInsertionMode(InTableMode);
1652 if (node->hasTagName(headTag)) {
1653 ASSERT(isParsingFragment());
1654 return setInsertionMode(InBodyMode);
1656 if (node->hasTagName(bodyTag))
1657 return setInsertionMode(InBodyMode);
1658 if (node->hasTagName(framesetTag)) {
1659 ASSERT(isParsingFragment());
1660 return setInsertionMode(InFramesetMode);
1662 if (node->hasTagName(htmlTag)) {
1663 ASSERT(isParsingFragment());
1664 return setInsertionMode(BeforeHeadMode);
1667 ASSERT(isParsingFragment());
1668 return setInsertionMode(InBodyMode);
1670 nodeRecord = nodeRecord->next();
1674 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token)
1676 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1677 if (isTableBodyContextTag(token->name())) {
1678 if (!m_tree.openElements()->inTableScope(token->name())) {
1682 m_tree.openElements()->popUntilTableBodyScopeMarker();
1683 m_tree.openElements()->pop();
1684 setInsertionMode(InTableMode);
1687 if (token->name() == tableTag) {
1688 // FIXME: This is slow.
1689 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1690 ASSERT(isParsingFragment());
1694 m_tree.openElements()->popUntilTableBodyScopeMarker();
1695 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1696 processFakeEndTag(m_tree.currentElement()->tagQName());
1697 processEndTag(token);
1700 if (token->name() == bodyTag
1701 || isCaptionColOrColgroupTag(token->name())
1702 || token->name() == htmlTag
1703 || isTableCellContextTag(token->name())
1704 || token->name() == trTag) {
1708 processEndTagForInTable(token);
1711 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token)
1713 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1714 if (token->name() == trTag) {
1715 processTrEndTagForInRow();
1718 if (token->name() == tableTag) {
1719 if (!processTrEndTagForInRow()) {
1720 ASSERT(isParsingFragment());
1723 ASSERT(insertionMode() == InTableBodyMode);
1724 processEndTag(token);
1727 if (isTableBodyContextTag(token->name())) {
1728 if (!m_tree.openElements()->inTableScope(token->name())) {
1732 processFakeEndTag(trTag);
1733 ASSERT(insertionMode() == InTableBodyMode);
1734 processEndTag(token);
1737 if (token->name() == bodyTag
1738 || isCaptionColOrColgroupTag(token->name())
1739 || token->name() == htmlTag
1740 || isTableCellContextTag(token->name())) {
1744 processEndTagForInTable(token);
1747 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token)
1749 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1750 if (isTableCellContextTag(token->name())) {
1751 if (!m_tree.openElements()->inTableScope(token->name())) {
1755 m_tree.generateImpliedEndTags();
1756 if (!m_tree.currentNode()->hasLocalName(token->name()))
1758 m_tree.openElements()->popUntilPopped(token->name());
1759 m_tree.activeFormattingElements()->clearToLastMarker();
1760 setInsertionMode(InRowMode);
1763 if (token->name() == bodyTag
1764 || isCaptionColOrColgroupTag(token->name())
1765 || token->name() == htmlTag) {
1769 if (token->name() == tableTag
1770 || token->name() == trTag
1771 || isTableBodyContextTag(token->name())) {
1772 if (!m_tree.openElements()->inTableScope(token->name())) {
1773 ASSERT(isTableBodyContextTag(token->name()) || isParsingFragment());
1778 processEndTag(token);
1781 processEndTagForInBody(token);
1784 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token)
1786 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1787 if (token->name() == bodyTag) {
1788 processBodyEndTagForInBody(token);
1791 if (token->name() == htmlTag) {
1792 RefPtr<AtomicHTMLToken> endBody = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, bodyTag.localName());
1793 if (processBodyEndTagForInBody(endBody.get()))
1794 processEndTag(token);
1797 if (token->name() == addressTag
1798 || token->name() == articleTag
1799 || token->name() == asideTag
1800 || token->name() == blockquoteTag
1801 || token->name() == buttonTag
1802 || token->name() == centerTag
1803 || token->name() == detailsTag
1804 || token->name() == dirTag
1805 || token->name() == divTag
1806 || token->name() == dlTag
1807 || token->name() == fieldsetTag
1808 || token->name() == figcaptionTag
1809 || token->name() == figureTag
1810 || token->name() == footerTag
1811 || token->name() == headerTag
1812 || token->name() == hgroupTag
1813 || token->name() == listingTag
1814 || token->name() == menuTag
1815 || token->name() == navTag
1816 || token->name() == olTag
1817 || token->name() == preTag
1818 || token->name() == sectionTag
1819 || token->name() == summaryTag
1820 || token->name() == ulTag) {
1821 if (!m_tree.openElements()->inScope(token->name())) {
1825 m_tree.generateImpliedEndTags();
1826 if (!m_tree.currentNode()->hasLocalName(token->name()))
1828 m_tree.openElements()->popUntilPopped(token->name());
1831 if (token->name() == formTag) {
1832 RefPtr<Element> node = m_tree.takeForm();
1833 if (!node || !m_tree.openElements()->inScope(node.get())) {
1837 m_tree.generateImpliedEndTags();
1838 if (m_tree.currentElement() != node.get())
1840 m_tree.openElements()->remove(node.get());
1842 if (token->name() == pTag) {
1843 if (!m_tree.openElements()->inButtonScope(token->name())) {
1845 processFakeStartTag(pTag);
1846 ASSERT(m_tree.openElements()->inScope(token->name()));
1847 processEndTag(token);
1850 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1851 if (!m_tree.currentNode()->hasLocalName(token->name()))
1853 m_tree.openElements()->popUntilPopped(token->name());
1856 if (token->name() == liTag) {
1857 if (!m_tree.openElements()->inListItemScope(token->name())) {
1861 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1862 if (!m_tree.currentNode()->hasLocalName(token->name()))
1864 m_tree.openElements()->popUntilPopped(token->name());
1867 if (token->name() == ddTag
1868 || token->name() == dtTag) {
1869 if (!m_tree.openElements()->inScope(token->name())) {
1873 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1874 if (!m_tree.currentNode()->hasLocalName(token->name()))
1876 m_tree.openElements()->popUntilPopped(token->name());
1879 if (isNumberedHeaderTag(token->name())) {
1880 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1884 m_tree.generateImpliedEndTags();
1885 if (!m_tree.currentNode()->hasLocalName(token->name()))
1887 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1890 if (isFormattingTag(token->name())) {
1891 callTheAdoptionAgency(token);
1894 if (token->name() == appletTag
1895 || token->name() == marqueeTag
1896 || token->name() == objectTag) {
1897 if (!m_tree.openElements()->inScope(token->name())) {
1901 m_tree.generateImpliedEndTags();
1902 if (!m_tree.currentNode()->hasLocalName(token->name()))
1904 m_tree.openElements()->popUntilPopped(token->name());
1905 m_tree.activeFormattingElements()->clearToLastMarker();
1908 if (token->name() == brTag) {
1910 processFakeStartTag(brTag);
1913 processAnyOtherEndTagForInBody(token);
1916 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1918 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1919 ASSERT(isParsingFragment());
1920 // FIXME: parse error
1923 m_tree.generateImpliedEndTags();
1924 // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1925 m_tree.openElements()->popUntilPopped(captionTag.localName());
1926 m_tree.activeFormattingElements()->clearToLastMarker();
1927 setInsertionMode(InTableMode);
1931 bool HTMLTreeBuilder::processTrEndTagForInRow()
1933 if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1934 ASSERT(isParsingFragment());
1935 // FIXME: parse error
1938 m_tree.openElements()->popUntilTableRowScopeMarker();
1939 ASSERT(m_tree.currentElement()->hasTagName(trTag));
1940 m_tree.openElements()->pop();
1941 setInsertionMode(InTableBodyMode);
1945 bool HTMLTreeBuilder::processTableEndTagForInTable()
1947 if (!m_tree.openElements()->inTableScope(tableTag)) {
1948 ASSERT(isParsingFragment());
1949 // FIXME: parse error.
1952 m_tree.openElements()->popUntilPopped(tableTag.localName());
1953 resetInsertionModeAppropriately();
1957 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token)
1959 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1960 if (token->name() == tableTag) {
1961 processTableEndTagForInTable();
1964 if (token->name() == bodyTag
1965 || isCaptionColOrColgroupTag(token->name())
1966 || token->name() == htmlTag
1967 || isTableBodyContextTag(token->name())
1968 || isTableCellContextTag(token->name())
1969 || token->name() == trTag) {
1974 // Is this redirection necessary here?
1975 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1976 processEndTagForInBody(token);
1979 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
1981 ASSERT(token->type() == HTMLTokenTypes::EndTag);
1982 switch (insertionMode()) {
1984 ASSERT(insertionMode() == InitialMode);
1985 defaultForInitial();
1987 case BeforeHTMLMode:
1988 ASSERT(insertionMode() == BeforeHTMLMode);
1989 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1993 defaultForBeforeHTML();
1995 case BeforeHeadMode:
1996 ASSERT(insertionMode() == BeforeHeadMode);
1997 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
2001 defaultForBeforeHead();
2004 ASSERT(insertionMode() == InHeadMode);
2005 if (token->name() == headTag) {
2006 m_tree.openElements()->popHTMLHeadElement();
2007 setInsertionMode(AfterHeadMode);
2010 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
2017 ASSERT(insertionMode() == AfterHeadMode);
2018 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
2022 defaultForAfterHead();
2025 ASSERT(insertionMode() == InBodyMode);
2026 processEndTagForInBody(token);
2029 ASSERT(insertionMode() == InTableMode);
2030 processEndTagForInTable(token);
2033 ASSERT(insertionMode() == InCaptionMode);
2034 if (token->name() == captionTag) {
2035 processCaptionEndTagForInCaption();
2038 if (token->name() == tableTag) {
2040 if (!processCaptionEndTagForInCaption()) {
2041 ASSERT(isParsingFragment());
2044 processEndTag(token);
2047 if (token->name() == bodyTag
2048 || token->name() == colTag
2049 || token->name() == colgroupTag
2050 || token->name() == htmlTag
2051 || isTableBodyContextTag(token->name())
2052 || isTableCellContextTag(token->name())
2053 || token->name() == trTag) {
2057 processEndTagForInBody(token);
2059 case InColumnGroupMode:
2060 ASSERT(insertionMode() == InColumnGroupMode);
2061 if (token->name() == colgroupTag) {
2062 processColgroupEndTagForInColumnGroup();
2065 if (token->name() == colTag) {
2069 if (!processColgroupEndTagForInColumnGroup()) {
2070 ASSERT(isParsingFragment());
2073 processEndTag(token);
2076 ASSERT(insertionMode() == InRowMode);
2077 processEndTagForInRow(token);
2080 ASSERT(insertionMode() == InCellMode);
2081 processEndTagForInCell(token);
2083 case InTableBodyMode:
2084 ASSERT(insertionMode() == InTableBodyMode);
2085 processEndTagForInTableBody(token);
2088 ASSERT(insertionMode() == AfterBodyMode);
2089 if (token->name() == htmlTag) {
2090 if (isParsingFragment()) {
2094 setInsertionMode(AfterAfterBodyMode);
2098 case AfterAfterBodyMode:
2099 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2101 setInsertionMode(InBodyMode);
2102 processEndTag(token);
2104 case InHeadNoscriptMode:
2105 ASSERT(insertionMode() == InHeadNoscriptMode);
2106 if (token->name() == noscriptTag) {
2107 ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2108 m_tree.openElements()->pop();
2109 ASSERT(m_tree.currentElement()->hasTagName(headTag));
2110 setInsertionMode(InHeadMode);
2113 if (token->name() != brTag) {
2117 defaultForInHeadNoscript();
2118 processToken(token);
2121 if (token->name() == scriptTag) {
2122 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2123 ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2124 m_scriptToProcess = m_tree.currentElement();
2125 m_tree.openElements()->pop();
2126 if (isParsingFragment() && m_fragmentContext.scriptingPermission() == DisallowScriptingContent)
2127 m_scriptToProcess->removeAllChildren();
2128 setInsertionMode(m_originalInsertionMode);
2130 // This token will not have been created by the tokenizer if a
2131 // self-closing script tag was encountered and pre-HTML5 parser
2132 // quirks are enabled. We must set the tokenizer's state to
2133 // DataState explicitly if the tokenizer didn't have a chance to.
2134 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
2135 m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
2138 m_tree.openElements()->pop();
2139 setInsertionMode(m_originalInsertionMode);
2141 case InFramesetMode:
2142 ASSERT(insertionMode() == InFramesetMode);
2143 if (token->name() == framesetTag) {
2144 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2148 m_tree.openElements()->pop();
2149 if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2150 setInsertionMode(AfterFramesetMode);
2154 case AfterFramesetMode:
2155 ASSERT(insertionMode() == AfterFramesetMode);
2156 if (token->name() == htmlTag) {
2157 setInsertionMode(AfterAfterFramesetMode);
2161 case AfterAfterFramesetMode:
2162 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2165 case InSelectInTableMode:
2166 ASSERT(insertionMode() == InSelectInTableMode);
2167 if (token->name() == captionTag
2168 || token->name() == tableTag
2169 || isTableBodyContextTag(token->name())
2170 || token->name() == trTag
2171 || isTableCellContextTag(token->name())) {
2173 if (m_tree.openElements()->inTableScope(token->name())) {
2174 RefPtr<AtomicHTMLToken> endSelect = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, selectTag.localName());
2175 processEndTag(endSelect.get());
2176 processEndTag(token);
2182 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2183 if (token->name() == optgroupTag) {
2184 if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2185 processFakeEndTag(optionTag);
2186 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2187 m_tree.openElements()->pop();
2193 if (token->name() == optionTag) {
2194 if (m_tree.currentNode()->hasTagName(optionTag)) {
2195 m_tree.openElements()->pop();
2201 if (token->name() == selectTag) {
2202 if (!m_tree.openElements()->inSelectScope(token->name())) {
2203 ASSERT(isParsingFragment());
2207 m_tree.openElements()->popUntilPopped(selectTag.localName());
2208 resetInsertionModeAppropriately();
2212 case InTableTextMode:
2213 defaultForInTableText();
2214 processEndTag(token);
2219 void HTMLTreeBuilder::processComment(AtomicHTMLToken* token)
2221 ASSERT(token->type() == HTMLTokenTypes::Comment);
2222 if (m_insertionMode == InitialMode
2223 || m_insertionMode == BeforeHTMLMode
2224 || m_insertionMode == AfterAfterBodyMode
2225 || m_insertionMode == AfterAfterFramesetMode) {
2226 m_tree.insertCommentOnDocument(token);
2229 if (m_insertionMode == AfterBodyMode) {
2230 m_tree.insertCommentOnHTMLHtmlElement(token);
2233 if (m_insertionMode == InTableTextMode) {
2234 defaultForInTableText();
2235 processComment(token);
2238 m_tree.insertComment(token);
2241 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token)
2243 ASSERT(token->type() == HTMLTokenTypes::Character);
2244 ExternalCharacterTokenBuffer buffer(token);
2245 processCharacterBuffer(buffer);
2248 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2251 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2252 // Note that this logic is different than the generic \r\n collapsing
2253 // handled in the input stream preprocessor. This logic is here as an
2254 // "authoring convenience" so folks can write:
2261 // without getting an extra newline at the start of their <pre> element.
2262 if (m_shouldSkipLeadingNewline) {
2263 m_shouldSkipLeadingNewline = false;
2264 buffer.skipAtMostOneLeadingNewline();
2265 if (buffer.isEmpty())
2269 switch (insertionMode()) {
2271 ASSERT(insertionMode() == InitialMode);
2272 buffer.skipLeadingWhitespace();
2273 if (buffer.isEmpty())
2275 defaultForInitial();
2278 case BeforeHTMLMode: {
2279 ASSERT(insertionMode() == BeforeHTMLMode);
2280 buffer.skipLeadingWhitespace();
2281 if (buffer.isEmpty())
2283 defaultForBeforeHTML();
2286 case BeforeHeadMode: {
2287 ASSERT(insertionMode() == BeforeHeadMode);
2288 buffer.skipLeadingWhitespace();
2289 if (buffer.isEmpty())
2291 defaultForBeforeHead();
2295 ASSERT(insertionMode() == InHeadMode);
2296 String leadingWhitespace = buffer.takeLeadingWhitespace();
2297 if (!leadingWhitespace.isEmpty())
2298 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2299 if (buffer.isEmpty())
2304 case AfterHeadMode: {
2305 ASSERT(insertionMode() == AfterHeadMode);
2306 String leadingWhitespace = buffer.takeLeadingWhitespace();
2307 if (!leadingWhitespace.isEmpty())
2308 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2309 if (buffer.isEmpty())
2311 defaultForAfterHead();
2317 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2318 processCharacterBufferForInBody(buffer);
2322 case InTableBodyMode:
2324 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2325 ASSERT(m_pendingTableCharacters.isEmpty());
2326 if (m_tree.currentNode()->isElementNode()
2327 && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
2328 || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
2329 || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
2330 || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
2331 || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
2332 m_originalInsertionMode = m_insertionMode;
2333 setInsertionMode(InTableTextMode);
2334 // Note that we fall through to the InTableTextMode case below.
2336 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2337 processCharacterBufferForInBody(buffer);
2342 case InTableTextMode: {
2343 buffer.giveRemainingTo(m_pendingTableCharacters);
2346 case InColumnGroupMode: {
2347 ASSERT(insertionMode() == InColumnGroupMode);
2348 String leadingWhitespace = buffer.takeLeadingWhitespace();
2349 if (!leadingWhitespace.isEmpty())
2350 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2351 if (buffer.isEmpty())
2353 if (!processColgroupEndTagForInColumnGroup()) {
2354 ASSERT(isParsingFragment());
2355 // The spec tells us to drop these characters on the floor.
2356 buffer.skipLeadingNonWhitespace();
2357 if (buffer.isEmpty())
2360 goto ReprocessBuffer;
2363 case AfterAfterBodyMode: {
2364 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2365 // FIXME: parse error
2366 setInsertionMode(InBodyMode);
2367 goto ReprocessBuffer;
2371 ASSERT(insertionMode() == TextMode);
2372 m_tree.insertTextNode(buffer.takeRemaining());
2375 case InHeadNoscriptMode: {
2376 ASSERT(insertionMode() == InHeadNoscriptMode);
2377 String leadingWhitespace = buffer.takeLeadingWhitespace();
2378 if (!leadingWhitespace.isEmpty())
2379 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2380 if (buffer.isEmpty())
2382 defaultForInHeadNoscript();
2383 goto ReprocessBuffer;
2386 case InFramesetMode:
2387 case AfterFramesetMode: {
2388 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2389 String leadingWhitespace = buffer.takeRemainingWhitespace();
2390 if (!leadingWhitespace.isEmpty())
2391 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2392 // FIXME: We should generate a parse error if we skipped over any
2393 // non-whitespace characters.
2396 case InSelectInTableMode:
2397 case InSelectMode: {
2398 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2399 m_tree.insertTextNode(buffer.takeRemaining());
2402 case AfterAfterFramesetMode: {
2403 String leadingWhitespace = buffer.takeRemainingWhitespace();
2404 if (!leadingWhitespace.isEmpty()) {
2405 m_tree.reconstructTheActiveFormattingElements();
2406 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2408 // FIXME: We should generate a parse error if we skipped over any
2409 // non-whitespace characters.
2415 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2417 m_tree.reconstructTheActiveFormattingElements();
2418 String characters = buffer.takeRemaining();
2419 m_tree.insertTextNode(characters);
2420 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2421 m_framesetOk = false;
2424 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token)
2426 ASSERT(token->type() == HTMLTokenTypes::EndOfFile);
2427 switch (insertionMode()) {
2429 ASSERT(insertionMode() == InitialMode);
2430 defaultForInitial();
2432 case BeforeHTMLMode:
2433 ASSERT(insertionMode() == BeforeHTMLMode);
2434 defaultForBeforeHTML();
2436 case BeforeHeadMode:
2437 ASSERT(insertionMode() == BeforeHeadMode);
2438 defaultForBeforeHead();
2441 ASSERT(insertionMode() == InHeadMode);
2445 ASSERT(insertionMode() == AfterHeadMode);
2446 defaultForAfterHead();
2452 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2453 notImplemented(); // Emit parse error based on what elements are still open.
2456 case AfterAfterBodyMode:
2457 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2459 case InHeadNoscriptMode:
2460 ASSERT(insertionMode() == InHeadNoscriptMode);
2461 defaultForInHeadNoscript();
2462 processEndOfFile(token);
2464 case AfterFramesetMode:
2465 case AfterAfterFramesetMode:
2466 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2468 case InFramesetMode:
2470 case InTableBodyMode:
2471 case InSelectInTableMode:
2473 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2474 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2477 case InColumnGroupMode:
2478 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2479 ASSERT(isParsingFragment());
2480 return; // FIXME: Should we break here instead of returning?
2482 if (!processColgroupEndTagForInColumnGroup()) {
2483 ASSERT(isParsingFragment());
2484 return; // FIXME: Should we break here instead of returning?
2486 processEndOfFile(token);
2488 case InTableTextMode:
2489 defaultForInTableText();
2490 processEndOfFile(token);
2494 if (m_tree.currentNode()->hasTagName(scriptTag))
2495 notImplemented(); // mark the script element as "already started".
2496 m_tree.openElements()->pop();
2497 ASSERT(m_originalInsertionMode != TextMode);
2498 setInsertionMode(m_originalInsertionMode);
2499 processEndOfFile(token);
2502 ASSERT(m_tree.currentNode());
2503 m_tree.openElements()->popAll();
2506 void HTMLTreeBuilder::defaultForInitial()
2509 if (!m_fragmentContext.fragment() && !m_document->isSrcdocDocument())
2510 m_document->setCompatibilityMode(Document::QuirksMode);
2511 // FIXME: parse error
2512 setInsertionMode(BeforeHTMLMode);
2515 void HTMLTreeBuilder::defaultForBeforeHTML()
2517 RefPtr<AtomicHTMLToken> startHTML = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, htmlTag.localName());
2518 m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML.get());
2519 setInsertionMode(BeforeHeadMode);
2522 void HTMLTreeBuilder::defaultForBeforeHead()
2524 RefPtr<AtomicHTMLToken> startHead = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, headTag.localName());
2525 processStartTag(startHead.get());
2528 void HTMLTreeBuilder::defaultForInHead()
2530 RefPtr<AtomicHTMLToken> endHead = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, headTag.localName());
2531 processEndTag(endHead.get());
2534 void HTMLTreeBuilder::defaultForInHeadNoscript()
2536 RefPtr<AtomicHTMLToken> endNoscript = AtomicHTMLToken::create(HTMLTokenTypes::EndTag, noscriptTag.localName());
2537 processEndTag(endNoscript.get());
2540 void HTMLTreeBuilder::defaultForAfterHead()
2542 RefPtr<AtomicHTMLToken> startBody = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, bodyTag.localName());
2543 processStartTag(startBody.get());
2544 m_framesetOk = true;
2547 void HTMLTreeBuilder::defaultForInTableText()
2549 String characters = m_pendingTableCharacters.toString();
2550 m_pendingTableCharacters.clear();
2551 if (!isAllWhitespace(characters)) {
2552 // FIXME: parse error
2553 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2554 m_tree.reconstructTheActiveFormattingElements();
2555 m_tree.insertTextNode(characters, NotAllWhitespace);
2556 m_framesetOk = false;
2557 setInsertionMode(m_originalInsertionMode);
2560 m_tree.insertTextNode(characters);
2561 setInsertionMode(m_originalInsertionMode);
2564 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token)
2566 ASSERT(token->type() == HTMLTokenTypes::StartTag);
2567 if (token->name() == htmlTag) {
2568 processHtmlStartTagForInBody(token);
2571 if (token->name() == baseTag
2572 || token->name() == basefontTag
2573 || token->name() == bgsoundTag
2574 || token->name() == commandTag
2575 || token->name() == linkTag
2576 || token->name() == metaTag) {
2577 m_tree.insertSelfClosingHTMLElement(token);
2578 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2581 if (token->name() == titleTag) {
2582 processGenericRCDATAStartTag(token);
2585 if (token->name() == noscriptTag) {
2586 if (scriptEnabled(m_document->frame())) {
2587 processGenericRawTextStartTag(token);
2590 m_tree.insertHTMLElement(token);
2591 setInsertionMode(InHeadNoscriptMode);
2594 if (token->name() == noframesTag || token->name() == styleTag) {
2595 processGenericRawTextStartTag(token);
2598 if (token->name() == scriptTag) {
2599 processScriptStartTag(token);
2600 if (m_usePreHTML5ParserQuirks && token->selfClosing())
2601 processFakeEndTag(scriptTag);
2604 if (token->name() == headTag) {
2611 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
2613 ASSERT(token->type() == HTMLTokenTypes::StartTag);
2614 m_tree.insertHTMLElement(token);
2615 m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
2616 m_originalInsertionMode = m_insertionMode;
2617 setInsertionMode(TextMode);
2620 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
2622 ASSERT(token->type() == HTMLTokenTypes::StartTag);
2623 m_tree.insertHTMLElement(token);
2624 m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
2625 m_originalInsertionMode = m_insertionMode;
2626 setInsertionMode(TextMode);
2629 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
2631 ASSERT(token->type() == HTMLTokenTypes::StartTag);
2632 m_tree.insertScriptElement(token);
2633 m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
2634 m_originalInsertionMode = m_insertionMode;
2636 TextPosition position = m_parser->textPosition();
2638 m_scriptToProcessStartPosition = position;
2640 setInsertionMode(TextMode);
2643 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2644 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token)
2646 if (m_tree.isEmpty())
2648 ContainerNode* node = m_tree.currentNode();
2649 if (isInHTMLNamespace(node))
2651 if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
2652 if (token->type() == HTMLTokenTypes::StartTag
2653 && token->name() != MathMLNames::mglyphTag
2654 && token->name() != MathMLNames::malignmarkTag)
2656 if (token->type() == HTMLTokenTypes::Character)
2659 if (node->hasTagName(MathMLNames::annotation_xmlTag)
2660 && token->type() == HTMLTokenTypes::StartTag
2661 && token->name() == SVGNames::svgTag)
2663 if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
2664 if (token->type() == HTMLTokenTypes::StartTag)
2666 if (token->type() == HTMLTokenTypes::Character)
2669 if (token->type() == HTMLTokenTypes::EndOfFile)
2674 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token)
2676 switch (token->type()) {
2677 case HTMLTokenTypes::Uninitialized:
2678 ASSERT_NOT_REACHED();
2680 case HTMLTokenTypes::DOCTYPE:
2683 case HTMLTokenTypes::StartTag: {
2684 if (token->name() == bTag
2685 || token->name() == bigTag
2686 || token->name() == blockquoteTag
2687 || token->name() == bodyTag
2688 || token->name() == brTag
2689 || token->name() == centerTag
2690 || token->name() == codeTag
2691 || token->name() == ddTag
2692 || token->name() == divTag
2693 || token->name() == dlTag
2694 || token->name() == dtTag
2695 || token->name() == emTag
2696 || token->name() == embedTag
2697 || isNumberedHeaderTag(token->name())
2698 || token->name() == headTag
2699 || token->name() == hrTag
2700 || token->name() == iTag
2701 || token->name() == imgTag
2702 || token->name() == liTag
2703 || token->name() == listingTag
2704 || token->name() == menuTag
2705 || token->name() == metaTag
2706 || token->name() == nobrTag
2707 || token->name() == olTag
2708 || token->name() == pTag
2709 || token->name() == preTag
2710 || token->name() == rubyTag
2711 || token->name() == sTag
2712 || token->name() == smallTag
2713 || token->name() == spanTag
2714 || token->name() == strongTag
2715 || token->name() == strikeTag
2716 || token->name() == subTag
2717 || token->name() == supTag
2718 || token->name() == tableTag
2719 || token->name() == ttTag
2720 || token->name() == uTag
2721 || token->name() == ulTag
2722 || token->name() == varTag
2723 || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) {
2725 m_tree.openElements()->popUntilForeignContentScopeMarker();
2726 processStartTag(token);
2729 const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
2730 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2731 adjustMathMLAttributes(token);
2732 if (currentNamespace == SVGNames::svgNamespaceURI) {
2733 adjustSVGTagNameCase(token);
2734 adjustSVGAttributes(token);
2736 adjustForeignAttributes(token);
2737 m_tree.insertForeignElement(token, currentNamespace);
2740 case HTMLTokenTypes::EndTag: {
2741 if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
2742 adjustSVGTagNameCase(token);
2744 if (token->name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2745 m_scriptToProcess = m_tree.currentElement();
2746 m_tree.openElements()->pop();
2749 if (!isInHTMLNamespace(m_tree.currentNode())) {
2750 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2751 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2752 if (!nodeRecord->node()->hasLocalName(token->name()))
2755 if (nodeRecord->node()->hasLocalName(token->name())) {
2756 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2759 nodeRecord = nodeRecord->next();
2761 if (isInHTMLNamespace(nodeRecord->node()))
2765 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2766 processEndTag(token);
2769 case HTMLTokenTypes::Comment:
2770 m_tree.insertComment(token);
2772 case HTMLTokenTypes::Character: {
2773 String characters = String(token->characters().data(), token->characters().size());
2774 m_tree.insertTextNode(characters);
2775 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2776 m_framesetOk = false;
2779 case HTMLTokenTypes::EndOfFile:
2780 ASSERT_NOT_REACHED();
2785 void HTMLTreeBuilder::finished()
2787 if (isParsingFragment())
2791 // Warning, this may detach the parser. Do not do anything else after this.
2792 m_document->finishedParsing();
2795 void HTMLTreeBuilder::parseError(AtomicHTMLToken*)
2799 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2803 return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2806 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2810 return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);