2010-10-25 Peter Rybin <peter.rybin@gmail.com>
[WebKit-https.git] / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "CharacterNames.h"
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLElementFactory.h"
36 #include "HTMLFormElement.h"
37 #include "HTMLHtmlElement.h"
38 #include "HTMLNames.h"
39 #include "HTMLParserIdioms.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLToken.h"
42 #include "HTMLTokenizer.h"
43 #include "LocalizedStrings.h"
44 #include "MathMLNames.h"
45 #include "NotImplemented.h"
46 #include "SVGNames.h"
47 #include "ScriptController.h"
48 #include "Text.h"
49 #include "XLinkNames.h"
50 #include "XMLNSNames.h"
51 #include "XMLNames.h"
52
53 namespace WebCore {
54
55 using namespace HTMLNames;
56
57 static const int uninitializedLineNumberValue = -1;
58
59 static TextPosition1 uninitializedPositionValue1()
60 {
61     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
62 }
63
64 namespace {
65
66 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
67 {
68     return isHTMLSpace(character) || character == replacementCharacter;
69 }
70
71 inline bool isAllWhitespace(const String& string)
72 {
73     return string.isAllSpecialCharacters<isHTMLSpace>();
74 }
75
76 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
77 {
78     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
79 }
80
81 bool isNumberedHeaderTag(const AtomicString& tagName)
82 {
83     return tagName == h1Tag
84         || tagName == h2Tag
85         || tagName == h3Tag
86         || tagName == h4Tag
87         || tagName == h5Tag
88         || tagName == h6Tag;
89 }
90
91 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
92 {
93     return tagName == captionTag
94         || tagName == colTag
95         || tagName == colgroupTag;
96 }
97
98 bool isTableCellContextTag(const AtomicString& tagName)
99 {
100     return tagName == thTag || tagName == tdTag;
101 }
102
103 bool isTableBodyContextTag(const AtomicString& tagName)
104 {
105     return tagName == tbodyTag
106         || tagName == tfootTag
107         || tagName == theadTag;
108 }
109
110 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
111 bool isSpecialNode(Node* node)
112 {
113     if (node->hasTagName(MathMLNames::miTag)
114         || node->hasTagName(MathMLNames::moTag)
115         || node->hasTagName(MathMLNames::mnTag)
116         || node->hasTagName(MathMLNames::msTag)
117         || node->hasTagName(MathMLNames::mtextTag)
118         || node->hasTagName(MathMLNames::annotation_xmlTag)
119         || node->hasTagName(SVGNames::foreignObjectTag)
120         || node->hasTagName(SVGNames::descTag)
121         || node->hasTagName(SVGNames::titleTag))
122         return true;
123     if (node->namespaceURI() != xhtmlNamespaceURI)
124         return false;
125     const AtomicString& tagName = node->localName();
126     return tagName == addressTag
127         || tagName == appletTag
128         || tagName == areaTag
129         || tagName == articleTag
130         || tagName == asideTag
131         || tagName == baseTag
132         || tagName == basefontTag
133         || tagName == bgsoundTag
134         || tagName == blockquoteTag
135         || tagName == bodyTag
136         || tagName == brTag
137         || tagName == buttonTag
138         || tagName == captionTag
139         || tagName == centerTag
140         || tagName == colTag
141         || tagName == colgroupTag
142         || tagName == commandTag
143         || tagName == ddTag
144         || tagName == detailsTag
145         || tagName == dirTag
146         || tagName == divTag
147         || tagName == dlTag
148         || tagName == dtTag
149         || tagName == embedTag
150         || tagName == fieldsetTag
151         || tagName == figcaptionTag
152         || tagName == figureTag
153         || tagName == footerTag
154         || tagName == formTag
155         || tagName == frameTag
156         || tagName == framesetTag
157         || isNumberedHeaderTag(tagName)
158         || tagName == headTag
159         || tagName == headerTag
160         || tagName == hgroupTag
161         || tagName == hrTag
162         || tagName == htmlTag
163         || tagName == iframeTag
164         || tagName == imgTag
165         || tagName == inputTag
166         || tagName == isindexTag
167         || tagName == liTag
168         || tagName == linkTag
169         || tagName == listingTag
170         || tagName == marqueeTag
171         || tagName == menuTag
172         || tagName == metaTag
173         || tagName == navTag
174         || tagName == noembedTag
175         || tagName == noframesTag
176         || tagName == noscriptTag
177         || tagName == objectTag
178         || tagName == olTag
179         || tagName == pTag
180         || tagName == paramTag
181         || tagName == plaintextTag
182         || tagName == preTag
183         || tagName == scriptTag
184         || tagName == sectionTag
185         || tagName == selectTag
186         || tagName == styleTag
187         || tagName == summaryTag
188         || tagName == tableTag
189         || isTableBodyContextTag(tagName)
190         || tagName == tdTag
191         || tagName == textareaTag
192         || tagName == thTag
193         || tagName == titleTag
194         || tagName == trTag
195         || tagName == ulTag
196         || tagName == wbrTag
197         || tagName == xmpTag;
198 }
199
200 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
201 {
202     return tagName == bTag
203         || tagName == bigTag
204         || tagName == codeTag
205         || tagName == emTag
206         || tagName == fontTag
207         || tagName == iTag
208         || tagName == sTag
209         || tagName == smallTag
210         || tagName == strikeTag
211         || tagName == strongTag
212         || tagName == ttTag
213         || tagName == uTag;
214 }
215
216 bool isNonAnchorFormattingTag(const AtomicString& tagName)
217 {
218     return tagName == nobrTag
219         || isNonAnchorNonNobrFormattingTag(tagName);
220 }
221
222 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
223 bool isFormattingTag(const AtomicString& tagName)
224 {
225     return tagName == aTag || isNonAnchorFormattingTag(tagName);
226 }
227
228 HTMLFormElement* closestFormAncestor(Element* element)
229 {
230     while (element) {
231         if (element->hasTagName(formTag))
232             return static_cast<HTMLFormElement*>(element);
233         Node* parent = element->parent();
234         if (!parent || !parent->isElementNode())
235             return 0;
236         element = static_cast<Element*>(parent);
237     }
238     return 0;
239 }
240
241 } // namespace
242
243 class HTMLTreeBuilder::ExternalCharacterTokenBuffer : public Noncopyable {
244 public:
245     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
246         : m_current(token.characters().data())
247         , m_end(m_current + token.characters().size())
248     {
249         ASSERT(!isEmpty());
250     }
251
252     explicit ExternalCharacterTokenBuffer(const String& string)
253         : m_current(string.characters())
254         , m_end(m_current + string.length())
255     {
256         ASSERT(!isEmpty());
257     }
258
259     ~ExternalCharacterTokenBuffer()
260     {
261         ASSERT(isEmpty());
262     }
263
264     bool isEmpty() const { return m_current == m_end; }
265
266     void skipLeadingWhitespace()
267     {
268         skipLeading<isHTMLSpace>();
269     }
270
271     String takeLeadingWhitespace()
272     {
273         return takeLeading<isHTMLSpace>();
274     }
275
276     String takeLeadingNonWhitespace()
277     {
278         return takeLeading<isNotHTMLSpace>();
279     }
280
281     String takeRemaining()
282     {
283         ASSERT(!isEmpty());
284         const UChar* start = m_current;
285         m_current = m_end;
286         return String(start, m_current - start);
287     }
288
289     void giveRemainingTo(Vector<UChar>& recipient)
290     {
291         recipient.append(m_current, m_end - m_current);
292         m_current = m_end;
293     }
294
295     String takeRemainingWhitespace()
296     {
297         ASSERT(!isEmpty());
298         Vector<UChar> whitespace;
299         do {
300             UChar cc = *m_current++;
301             if (isHTMLSpace(cc))
302                 whitespace.append(cc);
303         } while (m_current < m_end);
304         // Returning the null string when there aren't any whitespace
305         // characters is slightly cleaner semantically because we don't want
306         // to insert a text node (as opposed to inserting an empty text node).
307         if (whitespace.isEmpty())
308             return String();
309         return String::adopt(whitespace);
310     }
311
312 private:
313     template<bool characterPredicate(UChar)>
314     void skipLeading()
315     {
316         ASSERT(!isEmpty());
317         while (characterPredicate(*m_current)) {
318             if (++m_current == m_end)
319                 return;
320         }
321     }
322
323     template<bool characterPredicate(UChar)>
324     String takeLeading()
325     {
326         ASSERT(!isEmpty());
327         const UChar* start = m_current;
328         skipLeading<characterPredicate>();
329         if (start == m_current)
330             return String();
331         return String(start, m_current - start);
332     }
333
334     const UChar* m_current;
335     const UChar* m_end;
336 };
337
338
339 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
340     : m_framesetOk(true)
341     , m_document(document)
342     , m_tree(document, FragmentScriptingAllowed, false)
343     , m_reportErrors(reportErrors)
344     , m_isPaused(false)
345     , m_insertionMode(InitialMode)
346     , m_originalInsertionMode(InitialMode)
347     , m_tokenizer(tokenizer)
348     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
349     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
350     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
351     , m_hasPendingForeignInsertionModeSteps(false)
352 {
353 }
354
355 // FIXME: Member variables should be grouped into self-initializing structs to
356 // minimize code duplication between these constructors.
357 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
358     : m_framesetOk(true)
359     , m_fragmentContext(fragment, contextElement, scriptingPermission)
360     , m_document(m_fragmentContext.document())
361     , m_tree(m_document, scriptingPermission, true)
362     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
363     , m_isPaused(false)
364     , m_insertionMode(InitialMode)
365     , m_originalInsertionMode(InitialMode)
366     , m_tokenizer(tokenizer)
367     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
368     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
369     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
370     , m_hasPendingForeignInsertionModeSteps(false)
371 {
372     if (contextElement) {
373         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
374         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
375         m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
376         processFakeStartTag(htmlTag);
377         resetInsertionModeAppropriately();
378         m_tree.setForm(closestFormAncestor(contextElement));
379     }
380 }
381
382 HTMLTreeBuilder::~HTMLTreeBuilder()
383 {
384 }
385
386 void HTMLTreeBuilder::detach()
387 {
388     // This call makes little sense in fragment mode, but for consistency
389     // DocumentParser expects detach() to always be called before it's destroyed.
390     m_document = 0;
391     // HTMLConstructionSite might be on the callstack when detach() is called
392     // otherwise we'd just call m_tree.clear() here instead.
393     m_tree.detach();
394 }
395
396 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
397     : m_fragment(0)
398     , m_contextElement(0)
399     , m_scriptingPermission(FragmentScriptingAllowed)
400 {
401 }
402
403 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
404     : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
405     , m_fragment(fragment)
406     , m_contextElement(contextElement)
407     , m_scriptingPermission(scriptingPermission)
408 {
409     m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
410 }
411
412 Document* HTMLTreeBuilder::FragmentParsingContext::document() const
413 {
414     ASSERT(m_fragment);
415     return m_dummyDocumentForFragmentParsing.get();
416 }
417
418 void HTMLTreeBuilder::FragmentParsingContext::finished()
419 {
420     // Populate the DocumentFragment with the parsed content now that we're done.
421     ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
422     if (m_contextElement)
423         root = m_dummyDocumentForFragmentParsing->documentElement();
424     m_fragment->takeAllChildrenFrom(root);
425 }
426
427 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
428 {
429 }
430
431 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
432 {
433     // Unpause ourselves, callers may pause us again when processing the script.
434     // The HTML5 spec is written as though scripts are executed inside the tree
435     // builder.  We pause the parser to exit the tree builder, and then resume
436     // before running scripts.
437     m_isPaused = false;
438     scriptStartPosition = m_scriptToProcessStartPosition;
439     m_scriptToProcessStartPosition = uninitializedPositionValue1();
440     return m_scriptToProcess.release();
441 }
442
443 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
444 {
445     AtomicHTMLToken token(rawToken);
446     constructTreeFromAtomicToken(token);
447 }
448
449 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
450 {
451     processToken(token);
452
453     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
454     // the U+0000 characters into replacement characters has compatibility
455     // problems.
456     m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
457     m_tokenizer->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
458 }
459
460 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
461 {
462     switch (token.type()) {
463     case HTMLToken::Uninitialized:
464         ASSERT_NOT_REACHED();
465         break;
466     case HTMLToken::DOCTYPE:
467         processDoctypeToken(token);
468         break;
469     case HTMLToken::StartTag:
470         processStartTag(token);
471         break;
472     case HTMLToken::EndTag:
473         processEndTag(token);
474         break;
475     case HTMLToken::Comment:
476         processComment(token);
477         return;
478     case HTMLToken::Character:
479         processCharacter(token);
480         break;
481     case HTMLToken::EndOfFile:
482         processEndOfFile(token);
483         break;
484     }
485 }
486
487 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
488 {
489     ASSERT(token.type() == HTMLToken::DOCTYPE);
490     if (m_insertionMode == InitialMode) {
491         m_tree.insertDoctype(token);
492         setInsertionMode(BeforeHTMLMode);
493         return;
494     }
495     if (m_insertionMode == InTableTextMode) {
496         defaultForInTableText();
497         processDoctypeToken(token);
498         return;
499     }
500     parseError(token);
501 }
502
503 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
504 {
505     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
506     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
507     processStartTag(fakeToken);
508 }
509
510 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
511 {
512     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
513     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
514     processEndTag(fakeToken);
515 }
516
517 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
518 {
519     ASSERT(!characters.isEmpty());
520     ExternalCharacterTokenBuffer buffer(characters);
521     processCharacterBuffer(buffer);
522 }
523
524 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
525 {
526     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
527         return;
528     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
529     processEndTag(endP);
530 }
531
532 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
533 {
534     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
535     if (!attributes)
536         attributes = NamedNodeMap::create();
537     else {
538         attributes->removeAttribute(nameAttr);
539         attributes->removeAttribute(actionAttr);
540         attributes->removeAttribute(promptAttr);
541     }
542
543     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
544     attributes->insertAttribute(mappedAttribute.release(), false);
545     return attributes.release();
546 }
547
548 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
549 {
550     ASSERT(token.type() == HTMLToken::StartTag);
551     ASSERT(token.name() == isindexTag);
552     parseError(token);
553     if (m_tree.form())
554         return;
555     notImplemented(); // Acknowledge self-closing flag
556     processFakeStartTag(formTag);
557     Attribute* actionAttribute = token.getAttributeItem(actionAttr);
558     if (actionAttribute) {
559         ASSERT(m_tree.currentElement()->hasTagName(formTag));
560         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
561     }
562     processFakeStartTag(hrTag);
563     processFakeStartTag(labelTag);
564     Attribute* promptAttribute = token.getAttributeItem(promptAttr);
565     if (promptAttribute)
566         processFakeCharacters(promptAttribute->value());
567     else
568         processFakeCharacters(searchableIndexIntroduction());
569     processFakeStartTag(inputTag, attributesForIsindexInput(token));
570     notImplemented(); // This second set of characters may be needed by non-english locales.
571     processFakeEndTag(labelTag);
572     processFakeStartTag(hrTag);
573     processFakeEndTag(formTag);
574 }
575
576 namespace {
577
578 bool isLi(const Element* element)
579 {
580     return element->hasTagName(liTag);
581 }
582
583 bool isDdOrDt(const Element* element)
584 {
585     return element->hasTagName(ddTag)
586         || element->hasTagName(dtTag);
587 }
588
589 }
590
591 template <bool shouldClose(const Element*)>
592 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
593 {
594     m_framesetOk = false;
595     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
596     while (1) {
597         Element* node = nodeRecord->element();
598         if (shouldClose(node)) {
599             processFakeEndTag(node->tagQName());
600             break;
601         }
602         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
603             break;
604         nodeRecord = nodeRecord->next();
605     }
606     processFakePEndTagIfPInButtonScope();
607     m_tree.insertHTMLElement(token);
608 }
609
610 namespace {
611
612 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
613
614 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
615 {
616     for (size_t i = 0; i < length; ++i) {
617         const QualifiedName& name = *names[i];
618         const AtomicString& localName = name.localName();
619         AtomicString loweredLocalName = localName.lower();
620         if (loweredLocalName != localName)
621             map->add(loweredLocalName, name);
622     }
623 }
624
625 void adjustSVGTagNameCase(AtomicHTMLToken& token)
626 {
627     static PrefixedNameToQualifiedNameMap* caseMap = 0;
628     if (!caseMap) {
629         caseMap = new PrefixedNameToQualifiedNameMap;
630         size_t length = 0;
631         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
632         mapLoweredLocalNameToName(caseMap, svgTags, length);
633     }
634
635     const QualifiedName& casedName = caseMap->get(token.name());
636     if (casedName.localName().isNull())
637         return;
638     token.setName(casedName.localName());
639 }
640
641 template<QualifiedName** getAttrs(size_t* length)>
642 void adjustAttributes(AtomicHTMLToken& token)
643 {
644     static PrefixedNameToQualifiedNameMap* caseMap = 0;
645     if (!caseMap) {
646         caseMap = new PrefixedNameToQualifiedNameMap;
647         size_t length = 0;
648         QualifiedName** attrs = getAttrs(&length);
649         mapLoweredLocalNameToName(caseMap, attrs, length);
650     }
651
652     NamedNodeMap* attributes = token.attributes();
653     if (!attributes)
654         return;
655
656     for (unsigned x = 0; x < attributes->length(); ++x) {
657         Attribute* attribute = attributes->attributeItem(x);
658         const QualifiedName& casedName = caseMap->get(attribute->localName());
659         if (!casedName.localName().isNull())
660             attribute->parserSetName(casedName);
661     }
662 }
663
664 void adjustSVGAttributes(AtomicHTMLToken& token)
665 {
666     adjustAttributes<SVGNames::getSVGAttrs>(token);
667 }
668
669 void adjustMathMLAttributes(AtomicHTMLToken& token)
670 {
671     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
672 }
673
674 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
675 {
676     for (size_t i = 0; i < length; ++i) {
677         QualifiedName* name = names[i];
678         const AtomicString& localName = name->localName();
679         AtomicString prefixColonLocalName(prefix + ":" + localName);
680         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
681         map->add(prefixColonLocalName, nameWithPrefix);
682     }
683 }
684
685 void adjustForeignAttributes(AtomicHTMLToken& token)
686 {
687     static PrefixedNameToQualifiedNameMap* map = 0;
688     if (!map) {
689         map = new PrefixedNameToQualifiedNameMap;
690         size_t length = 0;
691         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
692         addNamesWithPrefix(map, "xlink", attrs, length);
693
694         attrs = XMLNames::getXMLAttrs(&length);
695         addNamesWithPrefix(map, "xml", attrs, length);
696
697         map->add("xmlns", XMLNSNames::xmlnsAttr);
698         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
699     }
700
701     NamedNodeMap* attributes = token.attributes();
702     if (!attributes)
703         return;
704
705     for (unsigned x = 0; x < attributes->length(); ++x) {
706         Attribute* attribute = attributes->attributeItem(x);
707         const QualifiedName& name = map->get(attribute->localName());
708         if (!name.localName().isNull())
709             attribute->parserSetName(name);
710     }
711 }
712
713 }
714
715 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
716 {
717     ASSERT(token.type() == HTMLToken::StartTag);
718     if (token.name() == htmlTag) {
719         m_tree.insertHTMLHtmlStartTagInBody(token);
720         return;
721     }
722     if (token.name() == baseTag
723         || token.name() == basefontTag
724         || token.name() == bgsoundTag
725         || token.name() == commandTag
726         || token.name() == linkTag
727         || token.name() == metaTag
728         || token.name() == noframesTag
729         || token.name() == scriptTag
730         || token.name() == styleTag
731         || token.name() == titleTag) {
732         bool didProcess = processStartTagForInHead(token);
733         ASSERT_UNUSED(didProcess, didProcess);
734         return;
735     }
736     if (token.name() == bodyTag) {
737         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
738             ASSERT(isParsingFragment());
739             return;
740         }
741         m_tree.insertHTMLBodyStartTagInBody(token);
742         return;
743     }
744     if (token.name() == framesetTag) {
745         parseError(token);
746         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
747             ASSERT(isParsingFragment());
748             return;
749         }
750         if (!m_framesetOk)
751             return;
752         ExceptionCode ec = 0;
753         m_tree.openElements()->bodyElement()->remove(ec);
754         ASSERT(!ec);
755         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
756         m_tree.openElements()->popHTMLBodyElement();
757         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
758         m_tree.insertHTMLElement(token);
759         setInsertionMode(InFramesetMode);
760         return;
761     }
762     if (token.name() == addressTag
763         || token.name() == articleTag
764         || token.name() == asideTag
765         || token.name() == blockquoteTag
766         || token.name() == centerTag
767         || token.name() == detailsTag
768         || token.name() == dirTag
769         || token.name() == divTag
770         || token.name() == dlTag
771         || token.name() == fieldsetTag
772         || token.name() == figcaptionTag
773         || token.name() == figureTag
774         || token.name() == footerTag
775         || token.name() == headerTag
776         || token.name() == hgroupTag
777         || token.name() == menuTag
778         || token.name() == navTag
779         || token.name() == olTag
780         || token.name() == pTag
781         || token.name() == sectionTag
782         || token.name() == summaryTag
783         || token.name() == ulTag) {
784         processFakePEndTagIfPInButtonScope();
785         m_tree.insertHTMLElement(token);
786         return;
787     }
788     if (isNumberedHeaderTag(token.name())) {
789         processFakePEndTagIfPInButtonScope();
790         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
791             parseError(token);
792             m_tree.openElements()->pop();
793         }
794         m_tree.insertHTMLElement(token);
795         return;
796     }
797     if (token.name() == preTag || token.name() == listingTag) {
798         processFakePEndTagIfPInButtonScope();
799         m_tree.insertHTMLElement(token);
800         m_tokenizer->setSkipLeadingNewLineForListing(true);
801         m_framesetOk = false;
802         return;
803     }
804     if (token.name() == formTag) {
805         if (m_tree.form()) {
806             parseError(token);
807             return;
808         }
809         processFakePEndTagIfPInButtonScope();
810         m_tree.insertHTMLFormElement(token);
811         return;
812     }
813     if (token.name() == liTag) {
814         processCloseWhenNestedTag<isLi>(token);
815         return;
816     }
817     if (token.name() == ddTag || token.name() == dtTag) {
818         processCloseWhenNestedTag<isDdOrDt>(token);
819         return;
820     }
821     if (token.name() == plaintextTag) {
822         processFakePEndTagIfPInButtonScope();
823         m_tree.insertHTMLElement(token);
824         m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
825         return;
826     }
827     if (token.name() == buttonTag) {
828         if (m_tree.openElements()->inScope(buttonTag)) {
829             parseError(token);
830             processFakeEndTag(buttonTag);
831             reprocessStartTag(token); // FIXME: Could we just fall through here?
832             return;
833         }
834         m_tree.reconstructTheActiveFormattingElements();
835         m_tree.insertHTMLElement(token);
836         m_framesetOk = false;
837         return;
838     }
839     if (token.name() == aTag) {
840         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
841         if (activeATag) {
842             parseError(token);
843             processFakeEndTag(aTag);
844             m_tree.activeFormattingElements()->remove(activeATag);
845             if (m_tree.openElements()->contains(activeATag))
846                 m_tree.openElements()->remove(activeATag);
847         }
848         m_tree.reconstructTheActiveFormattingElements();
849         m_tree.insertFormattingElement(token);
850         return;
851     }
852     if (isNonAnchorNonNobrFormattingTag(token.name())) {
853         m_tree.reconstructTheActiveFormattingElements();
854         m_tree.insertFormattingElement(token);
855         return;
856     }
857     if (token.name() == nobrTag) {
858         m_tree.reconstructTheActiveFormattingElements();
859         if (m_tree.openElements()->inScope(nobrTag)) {
860             parseError(token);
861             processFakeEndTag(nobrTag);
862             m_tree.reconstructTheActiveFormattingElements();
863         }
864         m_tree.insertFormattingElement(token);
865         return;
866     }
867     if (token.name() == appletTag
868         || token.name() == marqueeTag
869         || token.name() == objectTag) {
870         m_tree.reconstructTheActiveFormattingElements();
871         m_tree.insertHTMLElement(token);
872         m_tree.activeFormattingElements()->appendMarker();
873         m_framesetOk = false;
874         return;
875     }
876     if (token.name() == tableTag) {
877         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
878             processFakeEndTag(pTag);
879         m_tree.insertHTMLElement(token);
880         m_framesetOk = false;
881         setInsertionMode(InTableMode);
882         return;
883     }
884     if (token.name() == imageTag) {
885         parseError(token);
886         // Apparently we're not supposed to ask.
887         token.setName(imgTag.localName());
888         prepareToReprocessToken();
889         // Note the fall through to the imgTag handling below!
890     }
891     if (token.name() == areaTag
892         || token.name() == brTag
893         || token.name() == embedTag
894         || token.name() == imgTag
895         || token.name() == inputTag
896         || token.name() == keygenTag
897         || token.name() == wbrTag) {
898         m_tree.reconstructTheActiveFormattingElements();
899         m_tree.insertSelfClosingHTMLElement(token);
900         m_framesetOk = false;
901         return;
902     }
903     if (token.name() == paramTag
904         || token.name() == sourceTag
905         || token.name() == trackTag) {
906         m_tree.insertSelfClosingHTMLElement(token);
907         return;
908     }
909     if (token.name() == hrTag) {
910         processFakePEndTagIfPInButtonScope();
911         m_tree.insertSelfClosingHTMLElement(token);
912         m_framesetOk = false;
913         return;
914     }
915     if (token.name() == isindexTag) {
916         processIsindexStartTagForInBody(token);
917         return;
918     }
919     if (token.name() == textareaTag) {
920         m_tree.insertHTMLElement(token);
921         m_tokenizer->setSkipLeadingNewLineForListing(true);
922         m_tokenizer->setState(HTMLTokenizer::RCDATAState);
923         m_originalInsertionMode = m_insertionMode;
924         m_framesetOk = false;
925         setInsertionMode(TextMode);
926         return;
927     }
928     if (token.name() == xmpTag) {
929         processFakePEndTagIfPInButtonScope();
930         m_tree.reconstructTheActiveFormattingElements();
931         m_framesetOk = false;
932         processGenericRawTextStartTag(token);
933         return;
934     }
935     if (token.name() == iframeTag) {
936         m_framesetOk = false;
937         processGenericRawTextStartTag(token);
938         return;
939     }
940     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
941         processGenericRawTextStartTag(token);
942         return;
943     }
944     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
945         processGenericRawTextStartTag(token);
946         return;
947     }
948     if (token.name() == selectTag) {
949         m_tree.reconstructTheActiveFormattingElements();
950         m_tree.insertHTMLElement(token);
951         m_framesetOk = false;
952         if (m_insertionMode == InTableMode
953              || m_insertionMode == InCaptionMode
954              || m_insertionMode == InColumnGroupMode
955              || m_insertionMode == InTableBodyMode
956              || m_insertionMode == InRowMode
957              || m_insertionMode == InCellMode)
958             setInsertionMode(InSelectInTableMode);
959         else
960             setInsertionMode(InSelectMode);
961         return;
962     }
963     if (token.name() == optgroupTag || token.name() == optionTag) {
964         if (m_tree.openElements()->inScope(optionTag.localName())) {
965             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
966             processEndTag(endOption);
967         }
968         m_tree.reconstructTheActiveFormattingElements();
969         m_tree.insertHTMLElement(token);
970         return;
971     }
972     if (token.name() == rpTag || token.name() == rtTag) {
973         if (m_tree.openElements()->inScope(rubyTag.localName())) {
974             m_tree.generateImpliedEndTags();
975             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
976                 parseError(token);
977                 m_tree.openElements()->popUntil(rubyTag.localName());
978             }
979         }
980         m_tree.insertHTMLElement(token);
981         return;
982     }
983     if (token.name() == MathMLNames::mathTag.localName()) {
984         m_tree.reconstructTheActiveFormattingElements();
985         adjustMathMLAttributes(token);
986         adjustForeignAttributes(token);
987         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
988         if (m_insertionMode != InForeignContentMode)
989             setInsertionMode(InForeignContentMode);
990         return;
991     }
992     if (token.name() == SVGNames::svgTag.localName()) {
993         m_tree.reconstructTheActiveFormattingElements();
994         adjustSVGAttributes(token);
995         adjustForeignAttributes(token);
996         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
997         if (m_insertionMode != InForeignContentMode)
998             setInsertionMode(InForeignContentMode);
999         return;
1000     }
1001     if (isCaptionColOrColgroupTag(token.name())
1002         || token.name() == frameTag
1003         || token.name() == headTag
1004         || isTableBodyContextTag(token.name())
1005         || isTableCellContextTag(token.name())
1006         || token.name() == trTag) {
1007         parseError(token);
1008         return;
1009     }
1010     m_tree.reconstructTheActiveFormattingElements();
1011     m_tree.insertHTMLElement(token);
1012 }
1013
1014 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1015 {
1016     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1017         ASSERT(isParsingFragment());
1018         // FIXME: parse error
1019         return false;
1020     }
1021     m_tree.openElements()->pop();
1022     setInsertionMode(InTableMode);
1023     return true;
1024 }
1025
1026 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1027 void HTMLTreeBuilder::closeTheCell()
1028 {
1029     ASSERT(insertionMode() == InCellMode);
1030     if (m_tree.openElements()->inTableScope(tdTag)) {
1031         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1032         processFakeEndTag(tdTag);
1033         return;
1034     }
1035     ASSERT(m_tree.openElements()->inTableScope(thTag));
1036     processFakeEndTag(thTag);
1037     ASSERT(insertionMode() == InRowMode);
1038 }
1039
1040 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1041 {
1042     ASSERT(token.type() == HTMLToken::StartTag);
1043     if (token.name() == captionTag) {
1044         m_tree.openElements()->popUntilTableScopeMarker();
1045         m_tree.activeFormattingElements()->appendMarker();
1046         m_tree.insertHTMLElement(token);
1047         setInsertionMode(InCaptionMode);
1048         return;
1049     }
1050     if (token.name() == colgroupTag) {
1051         m_tree.openElements()->popUntilTableScopeMarker();
1052         m_tree.insertHTMLElement(token);
1053         setInsertionMode(InColumnGroupMode);
1054         return;
1055     }
1056     if (token.name() == colTag) {
1057         processFakeStartTag(colgroupTag);
1058         ASSERT(InColumnGroupMode);
1059         reprocessStartTag(token);
1060         return;
1061     }
1062     if (isTableBodyContextTag(token.name())) {
1063         m_tree.openElements()->popUntilTableScopeMarker();
1064         m_tree.insertHTMLElement(token);
1065         setInsertionMode(InTableBodyMode);
1066         return;
1067     }
1068     if (isTableCellContextTag(token.name())
1069         || token.name() == trTag) {
1070         processFakeStartTag(tbodyTag);
1071         ASSERT(insertionMode() == InTableBodyMode);
1072         reprocessStartTag(token);
1073         return;
1074     }
1075     if (token.name() == tableTag) {
1076         parseError(token);
1077         if (!processTableEndTagForInTable()) {
1078             ASSERT(isParsingFragment());
1079             return;
1080         }
1081         reprocessStartTag(token);
1082         return;
1083     }
1084     if (token.name() == styleTag || token.name() == scriptTag) {
1085         processStartTagForInHead(token);
1086         return;
1087     }
1088     if (token.name() == inputTag) {
1089         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1090         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1091             parseError(token);
1092             m_tree.insertSelfClosingHTMLElement(token);
1093             return;
1094         }
1095         // Fall through to "anything else" case.
1096     }
1097     if (token.name() == formTag) {
1098         parseError(token);
1099         if (m_tree.form())
1100             return;
1101         m_tree.insertHTMLFormElement(token, true);
1102         m_tree.openElements()->pop();
1103         return;
1104     }
1105     parseError(token);
1106     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1107     processStartTagForInBody(token);
1108 }
1109
1110 namespace {
1111
1112 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement)
1113 {
1114     ASSERT(token.type() == HTMLToken::StartTag);
1115     if (currentElement->hasTagName(MathMLNames::miTag)
1116         || currentElement->hasTagName(MathMLNames::moTag)
1117         || currentElement->hasTagName(MathMLNames::mnTag)
1118         || currentElement->hasTagName(MathMLNames::msTag)
1119         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1120         return token.name() != MathMLNames::mglyphTag
1121             && token.name() != MathMLNames::malignmarkTag;
1122     }
1123     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1124         return token.name() == SVGNames::svgTag;
1125     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1126         || currentElement->hasTagName(SVGNames::descTag)
1127         || currentElement->hasTagName(SVGNames::titleTag))
1128         return true;
1129     return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
1130 }
1131
1132 }
1133
1134 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1135 {
1136     ASSERT(token.type() == HTMLToken::StartTag);
1137     switch (insertionMode()) {
1138     case InitialMode:
1139         ASSERT(insertionMode() == InitialMode);
1140         defaultForInitial();
1141         // Fall through.
1142     case BeforeHTMLMode:
1143         ASSERT(insertionMode() == BeforeHTMLMode);
1144         if (token.name() == htmlTag) {
1145             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1146             setInsertionMode(BeforeHeadMode);
1147             return;
1148         }
1149         defaultForBeforeHTML();
1150         // Fall through.
1151     case BeforeHeadMode:
1152         ASSERT(insertionMode() == BeforeHeadMode);
1153         if (token.name() == htmlTag) {
1154             m_tree.insertHTMLHtmlStartTagInBody(token);
1155             return;
1156         }
1157         if (token.name() == headTag) {
1158             m_tree.insertHTMLHeadElement(token);
1159             setInsertionMode(InHeadMode);
1160             return;
1161         }
1162         defaultForBeforeHead();
1163         // Fall through.
1164     case InHeadMode:
1165         ASSERT(insertionMode() == InHeadMode);
1166         if (processStartTagForInHead(token))
1167             return;
1168         defaultForInHead();
1169         // Fall through.
1170     case AfterHeadMode:
1171         ASSERT(insertionMode() == AfterHeadMode);
1172         if (token.name() == htmlTag) {
1173             m_tree.insertHTMLHtmlStartTagInBody(token);
1174             return;
1175         }
1176         if (token.name() == bodyTag) {
1177             m_framesetOk = false;
1178             m_tree.insertHTMLBodyElement(token);
1179             setInsertionMode(InBodyMode);
1180             return;
1181         }
1182         if (token.name() == framesetTag) {
1183             m_tree.insertHTMLElement(token);
1184             setInsertionMode(InFramesetMode);
1185             return;
1186         }
1187         if (token.name() == baseTag
1188             || token.name() == basefontTag
1189             || token.name() == bgsoundTag
1190             || token.name() == linkTag
1191             || token.name() == metaTag
1192             || token.name() == noframesTag
1193             || token.name() == scriptTag
1194             || token.name() == styleTag
1195             || token.name() == titleTag) {
1196             parseError(token);
1197             ASSERT(m_tree.head());
1198             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1199             processStartTagForInHead(token);
1200             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1201             return;
1202         }
1203         if (token.name() == headTag) {
1204             parseError(token);
1205             return;
1206         }
1207         defaultForAfterHead();
1208         // Fall through
1209     case InBodyMode:
1210         ASSERT(insertionMode() == InBodyMode);
1211         processStartTagForInBody(token);
1212         break;
1213     case InTableMode:
1214         ASSERT(insertionMode() == InTableMode);
1215         processStartTagForInTable(token);
1216         break;
1217     case InCaptionMode:
1218         ASSERT(insertionMode() == InCaptionMode);
1219         if (isCaptionColOrColgroupTag(token.name())
1220             || isTableBodyContextTag(token.name())
1221             || isTableCellContextTag(token.name())
1222             || token.name() == trTag) {
1223             parseError(token);
1224             if (!processCaptionEndTagForInCaption()) {
1225                 ASSERT(isParsingFragment());
1226                 return;
1227             }
1228             reprocessStartTag(token);
1229             return;
1230         }
1231         processStartTagForInBody(token);
1232         break;
1233     case InColumnGroupMode:
1234         ASSERT(insertionMode() == InColumnGroupMode);
1235         if (token.name() == htmlTag) {
1236             m_tree.insertHTMLHtmlStartTagInBody(token);
1237             return;
1238         }
1239         if (token.name() == colTag) {
1240             m_tree.insertSelfClosingHTMLElement(token);
1241             return;
1242         }
1243         if (!processColgroupEndTagForInColumnGroup()) {
1244             ASSERT(isParsingFragment());
1245             return;
1246         }
1247         reprocessStartTag(token);
1248         break;
1249     case InTableBodyMode:
1250         ASSERT(insertionMode() == InTableBodyMode);
1251         if (token.name() == trTag) {
1252             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1253             m_tree.insertHTMLElement(token);
1254             setInsertionMode(InRowMode);
1255             return;
1256         }
1257         if (isTableCellContextTag(token.name())) {
1258             parseError(token);
1259             processFakeStartTag(trTag);
1260             ASSERT(insertionMode() == InRowMode);
1261             reprocessStartTag(token);
1262             return;
1263         }
1264         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1265             // FIXME: This is slow.
1266             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1267                 ASSERT(isParsingFragment());
1268                 parseError(token);
1269                 return;
1270             }
1271             m_tree.openElements()->popUntilTableBodyScopeMarker();
1272             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1273             processFakeEndTag(m_tree.currentElement()->tagQName());
1274             reprocessStartTag(token);
1275             return;
1276         }
1277         processStartTagForInTable(token);
1278         break;
1279     case InRowMode:
1280         ASSERT(insertionMode() == InRowMode);
1281         if (isTableCellContextTag(token.name())) {
1282             m_tree.openElements()->popUntilTableRowScopeMarker();
1283             m_tree.insertHTMLElement(token);
1284             setInsertionMode(InCellMode);
1285             m_tree.activeFormattingElements()->appendMarker();
1286             return;
1287         }
1288         if (token.name() == trTag
1289             || isCaptionColOrColgroupTag(token.name())
1290             || isTableBodyContextTag(token.name())) {
1291             if (!processTrEndTagForInRow()) {
1292                 ASSERT(isParsingFragment());
1293                 return;
1294             }
1295             ASSERT(insertionMode() == InTableBodyMode);
1296             reprocessStartTag(token);
1297             return;
1298         }
1299         processStartTagForInTable(token);
1300         break;
1301     case InCellMode:
1302         ASSERT(insertionMode() == InCellMode);
1303         if (isCaptionColOrColgroupTag(token.name())
1304             || isTableCellContextTag(token.name())
1305             || token.name() == trTag
1306             || isTableBodyContextTag(token.name())) {
1307             // FIXME: This could be more efficient.
1308             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1309                 ASSERT(isParsingFragment());
1310                 parseError(token);
1311                 return;
1312             }
1313             closeTheCell();
1314             reprocessStartTag(token);
1315             return;
1316         }
1317         processStartTagForInBody(token);
1318         break;
1319     case AfterBodyMode:
1320     case AfterAfterBodyMode:
1321         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1322         if (token.name() == htmlTag) {
1323             m_tree.insertHTMLHtmlStartTagInBody(token);
1324             return;
1325         }
1326         setInsertionMode(InBodyMode);
1327         reprocessStartTag(token);
1328         break;
1329     case InHeadNoscriptMode:
1330         ASSERT(insertionMode() == InHeadNoscriptMode);
1331         if (token.name() == htmlTag) {
1332             m_tree.insertHTMLHtmlStartTagInBody(token);
1333             return;
1334         }
1335         if (token.name() == basefontTag
1336             || token.name() == bgsoundTag
1337             || token.name() == linkTag
1338             || token.name() == metaTag
1339             || token.name() == noframesTag
1340             || token.name() == styleTag) {
1341             bool didProcess = processStartTagForInHead(token);
1342             ASSERT_UNUSED(didProcess, didProcess);
1343             return;
1344         }
1345         if (token.name() == htmlTag || token.name() == noscriptTag) {
1346             parseError(token);
1347             return;
1348         }
1349         defaultForInHeadNoscript();
1350         processToken(token);
1351         break;
1352     case InFramesetMode:
1353         ASSERT(insertionMode() == InFramesetMode);
1354         if (token.name() == htmlTag) {
1355             m_tree.insertHTMLHtmlStartTagInBody(token);
1356             return;
1357         }
1358         if (token.name() == framesetTag) {
1359             m_tree.insertHTMLElement(token);
1360             return;
1361         }
1362         if (token.name() == frameTag) {
1363             m_tree.insertSelfClosingHTMLElement(token);
1364             return;
1365         }
1366         if (token.name() == noframesTag) {
1367             processStartTagForInHead(token);
1368             return;
1369         }
1370         parseError(token);
1371         break;
1372     case AfterFramesetMode:
1373     case AfterAfterFramesetMode:
1374         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1375         if (token.name() == htmlTag) {
1376             m_tree.insertHTMLHtmlStartTagInBody(token);
1377             return;
1378         }
1379         if (token.name() == noframesTag) {
1380             processStartTagForInHead(token);
1381             return;
1382         }
1383         parseError(token);
1384         break;
1385     case InSelectInTableMode:
1386         ASSERT(insertionMode() == InSelectInTableMode);
1387         if (token.name() == captionTag
1388             || token.name() == tableTag
1389             || isTableBodyContextTag(token.name())
1390             || token.name() == trTag
1391             || isTableCellContextTag(token.name())) {
1392             parseError(token);
1393             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1394             processEndTag(endSelect);
1395             reprocessStartTag(token);
1396             return;
1397         }
1398         // Fall through
1399     case InSelectMode:
1400         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1401         if (token.name() == htmlTag) {
1402             m_tree.insertHTMLHtmlStartTagInBody(token);
1403             return;
1404         }
1405         if (token.name() == optionTag) {
1406             if (m_tree.currentElement()->hasTagName(optionTag)) {
1407                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1408                 processEndTag(endOption);
1409             }
1410             m_tree.insertHTMLElement(token);
1411             return;
1412         }
1413         if (token.name() == optgroupTag) {
1414             if (m_tree.currentElement()->hasTagName(optionTag)) {
1415                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1416                 processEndTag(endOption);
1417             }
1418             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1419                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1420                 processEndTag(endOptgroup);
1421             }
1422             m_tree.insertHTMLElement(token);
1423             return;
1424         }
1425         if (token.name() == selectTag) {
1426             parseError(token);
1427             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1428             processEndTag(endSelect);
1429             return;
1430         }
1431         if (token.name() == inputTag
1432             || token.name() == keygenTag
1433             || token.name() == textareaTag) {
1434             parseError(token);
1435             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1436                 ASSERT(isParsingFragment());
1437                 return;
1438             }
1439             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1440             processEndTag(endSelect);
1441             reprocessStartTag(token);
1442             return;
1443         }
1444         if (token.name() == scriptTag) {
1445             bool didProcess = processStartTagForInHead(token);
1446             ASSERT_UNUSED(didProcess, didProcess);
1447             return;
1448         }
1449         break;
1450     case InTableTextMode:
1451         defaultForInTableText();
1452         processStartTag(token);
1453         break;
1454     case InForeignContentMode: {
1455         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentElement())) {
1456             processForeignContentUsingInBodyModeAndResetMode(token);
1457             return;
1458         }
1459         if (token.name() == bTag
1460             || token.name() == bigTag
1461             || token.name() == blockquoteTag
1462             || token.name() == bodyTag
1463             || token.name() == brTag
1464             || token.name() == centerTag
1465             || token.name() == codeTag
1466             || token.name() == ddTag
1467             || token.name() == divTag
1468             || token.name() == dlTag
1469             || token.name() == dtTag
1470             || token.name() == emTag
1471             || token.name() == embedTag
1472             || isNumberedHeaderTag(token.name())
1473             || token.name() == headTag
1474             || token.name() == hrTag
1475             || token.name() == iTag
1476             || token.name() == imgTag
1477             || token.name() == liTag
1478             || token.name() == listingTag
1479             || token.name() == menuTag
1480             || token.name() == metaTag
1481             || token.name() == nobrTag
1482             || token.name() == olTag
1483             || token.name() == pTag
1484             || token.name() == preTag
1485             || token.name() == rubyTag
1486             || token.name() == sTag
1487             || token.name() == smallTag
1488             || token.name() == spanTag
1489             || token.name() == strongTag
1490             || token.name() == strikeTag
1491             || token.name() == subTag
1492             || token.name() == supTag
1493             || token.name() == tableTag
1494             || token.name() == ttTag
1495             || token.name() == uTag
1496             || token.name() == ulTag
1497             || token.name() == varTag
1498             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1499             parseError(token);
1500             m_tree.openElements()->popUntilForeignContentScopeMarker();
1501             resetInsertionModeAppropriately();
1502             reprocessStartTag(token);
1503             return;
1504         }
1505         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1506         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1507             adjustMathMLAttributes(token);
1508         if (currentNamespace == SVGNames::svgNamespaceURI) {
1509             adjustSVGTagNameCase(token);
1510             adjustSVGAttributes(token);
1511         }
1512         adjustForeignAttributes(token);
1513         m_tree.insertForeignElement(token, currentNamespace);
1514         break;
1515     }
1516     case TextMode:
1517         ASSERT_NOT_REACHED();
1518         break;
1519     }
1520 }
1521
1522 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1523 {
1524     ASSERT(token.type() == HTMLToken::EndTag);
1525     ASSERT(token.name() == bodyTag);
1526     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1527         parseError(token);
1528         return false;
1529     }
1530     notImplemented(); // Emit a more specific parse error based on stack contents.
1531     setInsertionMode(AfterBodyMode);
1532     return true;
1533 }
1534
1535 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1536 {
1537     ASSERT(token.type() == HTMLToken::EndTag);
1538     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1539     while (1) {
1540         Element* node = record->element();
1541         if (node->hasLocalName(token.name())) {
1542             m_tree.generateImpliedEndTags();
1543             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1544                 parseError(token);
1545                 // FIXME: This is either a bug in the spec, or a bug in our
1546                 // implementation.  Filed a bug with HTML5:
1547                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1548                 // We might have already popped the node for the token in
1549                 // generateImpliedEndTags, just abort.
1550                 if (!m_tree.openElements()->contains(node))
1551                     return;
1552             }
1553             m_tree.openElements()->popUntilPopped(node);
1554             return;
1555         }
1556         if (isSpecialNode(node)) {
1557             parseError(token);
1558             return;
1559         }
1560         record = record->next();
1561     }
1562 }
1563
1564 // FIXME: This probably belongs on HTMLElementStack.
1565 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1566 {
1567     HTMLElementStack::ElementRecord* furthestBlock = 0;
1568     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1569     for (; record; record = record->next()) {
1570         if (record->element() == formattingElement)
1571             return furthestBlock;
1572         if (isSpecialNode(record->element()))
1573             furthestBlock = record;
1574     }
1575     ASSERT_NOT_REACHED();
1576     return 0;
1577 }
1578
1579 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1580 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1581 {
1582     // The adoption agency algorithm is N^2.  We limit the number of iterations
1583     // to stop from hanging the whole browser.  This limit is copied from the
1584     // legacy tree builder and might need to be tweaked in the future.
1585     static const int adoptionAgencyIterationLimit = 10;
1586
1587     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1588         // 1.
1589         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1590         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1591             parseError(token);
1592             notImplemented(); // Check the stack of open elements for a more specific parse error.
1593             return;
1594         }
1595         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1596         if (!formattingElementRecord) {
1597             parseError(token);
1598             m_tree.activeFormattingElements()->remove(formattingElement);
1599             return;
1600         }
1601         if (formattingElement != m_tree.currentElement())
1602             parseError(token);
1603         // 2.
1604         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1605         // 3.
1606         if (!furthestBlock) {
1607             m_tree.openElements()->popUntilPopped(formattingElement);
1608             m_tree.activeFormattingElements()->remove(formattingElement);
1609             return;
1610         }
1611         // 4.
1612         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1613         Element* commonAncestor = formattingElementRecord->next()->element();
1614         // 5.
1615         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1616         // 6.
1617         HTMLElementStack::ElementRecord* node = furthestBlock;
1618         HTMLElementStack::ElementRecord* nextNode = node->next();
1619         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1620         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1621             // 6.1
1622             node = nextNode;
1623             ASSERT(node);
1624             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1625             // 6.2
1626             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1627                 m_tree.openElements()->remove(node->element());
1628                 node = 0;
1629                 continue;
1630             }
1631             // 6.3
1632             if (node == formattingElementRecord)
1633                 break;
1634             // 6.5
1635             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1636             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1637             nodeEntry->replaceElement(newElement.get());
1638             node->replaceElement(newElement.release());
1639             // 6.4 -- Intentionally out of order to handle the case where node
1640             // was replaced in 6.5.
1641             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1642             if (lastNode == furthestBlock)
1643                 bookmark.moveToAfter(nodeEntry);
1644             // 6.6
1645             if (Element* parent = lastNode->element()->parentElement())
1646                 parent->parserRemoveChild(lastNode->element());
1647             node->element()->parserAddChild(lastNode->element());
1648             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1649                 lastNode->element()->lazyAttach();
1650             // 6.7
1651             lastNode = node;
1652         }
1653         // 7
1654         const AtomicString& commonAncestorTag = commonAncestor->localName();
1655         if (Element* parent = lastNode->element()->parentElement())
1656             parent->parserRemoveChild(lastNode->element());
1657         // FIXME: If this moves to HTMLConstructionSite, this check should use
1658         // causesFosterParenting(tagName) instead.
1659         if (commonAncestorTag == tableTag
1660             || commonAncestorTag == trTag
1661             || isTableBodyContextTag(commonAncestorTag))
1662             m_tree.fosterParent(lastNode->element());
1663         else {
1664             commonAncestor->parserAddChild(lastNode->element());
1665             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1666                 lastNode->element()->lazyAttach();
1667         }
1668         // 8
1669         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1670         // 9
1671         newElement->takeAllChildrenFrom(furthestBlock->element());
1672         // 10
1673         Element* furthestBlockElement = furthestBlock->element();
1674         // FIXME: All this creation / parserAddChild / attach business should
1675         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1676         //        should all be in some HTMLConstructionSite function.
1677         furthestBlockElement->parserAddChild(newElement);
1678         if (furthestBlockElement->attached() && !newElement->attached()) {
1679             // Notice that newElement might already be attached if, for example, one of the reparented
1680             // children is a style element, which attaches itself automatically.
1681             newElement->attach();
1682         }
1683         // 11
1684         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1685         // 12
1686         m_tree.openElements()->remove(formattingElement);
1687         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1688     }
1689 }
1690
1691 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1692 {
1693     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1694     bool last = false;
1695     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1696     while (1) {
1697         Element* node = nodeRecord->element();
1698         if (node == m_tree.openElements()->bottom()) {
1699             ASSERT(isParsingFragment());
1700             last = true;
1701             node = m_fragmentContext.contextElement();
1702         }
1703         if (node->hasTagName(selectTag)) {
1704             ASSERT(isParsingFragment());
1705             return setInsertionMode(InSelectMode);
1706         }
1707         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1708             return setInsertionMode(InCellMode);
1709         if (node->hasTagName(trTag))
1710             return setInsertionMode(InRowMode);
1711         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1712             return setInsertionMode(InTableBodyMode);
1713         if (node->hasTagName(captionTag))
1714             return setInsertionMode(InCaptionMode);
1715         if (node->hasTagName(colgroupTag)) {
1716             ASSERT(isParsingFragment());
1717             return setInsertionMode(InColumnGroupMode);
1718         }
1719         if (node->hasTagName(tableTag))
1720             return setInsertionMode(InTableMode);
1721         if (node->hasTagName(headTag)) {
1722             ASSERT(isParsingFragment());
1723             return setInsertionMode(InBodyMode);
1724         }
1725         if (node->hasTagName(bodyTag))
1726             return setInsertionMode(InBodyMode);
1727         if (node->hasTagName(framesetTag)) {
1728             ASSERT(isParsingFragment());
1729             return setInsertionMode(InFramesetMode);
1730         }
1731         if (node->hasTagName(htmlTag)) {
1732             ASSERT(isParsingFragment());
1733             return setInsertionMode(BeforeHeadMode);
1734         }
1735         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1736             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1737             return setInsertionMode(InForeignContentMode);
1738         if (last) {
1739             ASSERT(isParsingFragment());
1740             return setInsertionMode(InBodyMode);
1741         }
1742         nodeRecord = nodeRecord->next();
1743     }
1744 }
1745
1746 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1747 {
1748     ASSERT(token.type() == HTMLToken::EndTag);
1749     if (isTableBodyContextTag(token.name())) {
1750         if (!m_tree.openElements()->inTableScope(token.name())) {
1751             parseError(token);
1752             return;
1753         }
1754         m_tree.openElements()->popUntilTableBodyScopeMarker();
1755         m_tree.openElements()->pop();
1756         setInsertionMode(InTableMode);
1757         return;
1758     }
1759     if (token.name() == tableTag) {
1760         // FIXME: This is slow.
1761         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1762             ASSERT(isParsingFragment());
1763             parseError(token);
1764             return;
1765         }
1766         m_tree.openElements()->popUntilTableBodyScopeMarker();
1767         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1768         processFakeEndTag(m_tree.currentElement()->tagQName());
1769         reprocessEndTag(token);
1770         return;
1771     }
1772     if (token.name() == bodyTag
1773         || isCaptionColOrColgroupTag(token.name())
1774         || token.name() == htmlTag
1775         || isTableCellContextTag(token.name())
1776         || token.name() == trTag) {
1777         parseError(token);
1778         return;
1779     }
1780     processEndTagForInTable(token);
1781 }
1782
1783 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1784 {
1785     ASSERT(token.type() == HTMLToken::EndTag);
1786     if (token.name() == trTag) {
1787         processTrEndTagForInRow();
1788         return;
1789     }
1790     if (token.name() == tableTag) {
1791         if (!processTrEndTagForInRow()) {
1792             ASSERT(isParsingFragment());
1793             return;
1794         }
1795         ASSERT(insertionMode() == InTableBodyMode);
1796         reprocessEndTag(token);
1797         return;
1798     }
1799     if (isTableBodyContextTag(token.name())) {
1800         if (!m_tree.openElements()->inTableScope(token.name())) {
1801             parseError(token);
1802             return;
1803         }
1804         processFakeEndTag(trTag);
1805         ASSERT(insertionMode() == InTableBodyMode);
1806         reprocessEndTag(token);
1807         return;
1808     }
1809     if (token.name() == bodyTag
1810         || isCaptionColOrColgroupTag(token.name())
1811         || token.name() == htmlTag
1812         || isTableCellContextTag(token.name())) {
1813         parseError(token);
1814         return;
1815     }
1816     processEndTagForInTable(token);
1817 }
1818
1819 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1820 {
1821     ASSERT(token.type() == HTMLToken::EndTag);
1822     if (isTableCellContextTag(token.name())) {
1823         if (!m_tree.openElements()->inTableScope(token.name())) {
1824             parseError(token);
1825             return;
1826         }
1827         m_tree.generateImpliedEndTags();
1828         if (!m_tree.currentElement()->hasLocalName(token.name()))
1829             parseError(token);
1830         m_tree.openElements()->popUntilPopped(token.name());
1831         m_tree.activeFormattingElements()->clearToLastMarker();
1832         setInsertionMode(InRowMode);
1833         return;
1834     }
1835     if (token.name() == bodyTag
1836         || isCaptionColOrColgroupTag(token.name())
1837         || token.name() == htmlTag) {
1838         parseError(token);
1839         return;
1840     }
1841     if (token.name() == tableTag
1842         || token.name() == trTag
1843         || isTableBodyContextTag(token.name())) {
1844         if (!m_tree.openElements()->inTableScope(token.name())) {
1845             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1846             parseError(token);
1847             return;
1848         }
1849         closeTheCell();
1850         reprocessEndTag(token);
1851         return;
1852     }
1853     processEndTagForInBody(token);
1854 }
1855
1856 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1857 {
1858     ASSERT(token.type() == HTMLToken::EndTag);
1859     if (token.name() == bodyTag) {
1860         processBodyEndTagForInBody(token);
1861         return;
1862     }
1863     if (token.name() == htmlTag) {
1864         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1865         if (processBodyEndTagForInBody(endBody))
1866             reprocessEndTag(token);
1867         return;
1868     }
1869     if (token.name() == addressTag
1870         || token.name() == articleTag
1871         || token.name() == asideTag
1872         || token.name() == blockquoteTag
1873         || token.name() == buttonTag
1874         || token.name() == centerTag
1875         || token.name() == detailsTag
1876         || token.name() == dirTag
1877         || token.name() == divTag
1878         || token.name() == dlTag
1879         || token.name() == fieldsetTag
1880         || token.name() == figcaptionTag
1881         || token.name() == figureTag
1882         || token.name() == footerTag
1883         || token.name() == headerTag
1884         || token.name() == hgroupTag
1885         || token.name() == listingTag
1886         || token.name() == menuTag
1887         || token.name() == navTag
1888         || token.name() == olTag
1889         || token.name() == preTag
1890         || token.name() == sectionTag
1891         || token.name() == summaryTag
1892         || token.name() == ulTag) {
1893         if (!m_tree.openElements()->inScope(token.name())) {
1894             parseError(token);
1895             return;
1896         }
1897         m_tree.generateImpliedEndTags();
1898         if (!m_tree.currentElement()->hasLocalName(token.name()))
1899             parseError(token);
1900         m_tree.openElements()->popUntilPopped(token.name());
1901         return;
1902     }
1903     if (token.name() == formTag) {
1904         RefPtr<Element> node = m_tree.takeForm();
1905         if (!node || !m_tree.openElements()->inScope(node.get())) {
1906             parseError(token);
1907             return;
1908         }
1909         m_tree.generateImpliedEndTags();
1910         if (m_tree.currentElement() != node.get())
1911             parseError(token);
1912         m_tree.openElements()->remove(node.get());
1913     }
1914     if (token.name() == pTag) {
1915         if (!m_tree.openElements()->inButtonScope(token.name())) {
1916             parseError(token);
1917             processFakeStartTag(pTag);
1918             ASSERT(m_tree.openElements()->inScope(token.name()));
1919             reprocessEndTag(token);
1920             return;
1921         }
1922         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1923         if (!m_tree.currentElement()->hasLocalName(token.name()))
1924             parseError(token);
1925         m_tree.openElements()->popUntilPopped(token.name());
1926         return;
1927     }
1928     if (token.name() == liTag) {
1929         if (!m_tree.openElements()->inListItemScope(token.name())) {
1930             parseError(token);
1931             return;
1932         }
1933         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1934         if (!m_tree.currentElement()->hasLocalName(token.name()))
1935             parseError(token);
1936         m_tree.openElements()->popUntilPopped(token.name());
1937         return;
1938     }
1939     if (token.name() == ddTag
1940         || token.name() == dtTag) {
1941         if (!m_tree.openElements()->inScope(token.name())) {
1942             parseError(token);
1943             return;
1944         }
1945         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1946         if (!m_tree.currentElement()->hasLocalName(token.name()))
1947             parseError(token);
1948         m_tree.openElements()->popUntilPopped(token.name());
1949         return;
1950     }
1951     if (isNumberedHeaderTag(token.name())) {
1952         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1953             parseError(token);
1954             return;
1955         }
1956         m_tree.generateImpliedEndTags();
1957         if (!m_tree.currentElement()->hasLocalName(token.name()))
1958             parseError(token);
1959         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1960         return;
1961     }
1962     if (isFormattingTag(token.name())) {
1963         callTheAdoptionAgency(token);
1964         return;
1965     }
1966     if (token.name() == appletTag
1967         || token.name() == marqueeTag
1968         || token.name() == objectTag) {
1969         if (!m_tree.openElements()->inScope(token.name())) {
1970             parseError(token);
1971             return;
1972         }
1973         m_tree.generateImpliedEndTags();
1974         if (!m_tree.currentElement()->hasLocalName(token.name()))
1975             parseError(token);
1976         m_tree.openElements()->popUntilPopped(token.name());
1977         m_tree.activeFormattingElements()->clearToLastMarker();
1978         return;
1979     }
1980     if (token.name() == brTag) {
1981         parseError(token);
1982         processFakeStartTag(brTag);
1983         return;
1984     }
1985     processAnyOtherEndTagForInBody(token);
1986 }
1987
1988 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1989 {
1990     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1991         ASSERT(isParsingFragment());
1992         // FIXME: parse error
1993         return false;
1994     }
1995     m_tree.generateImpliedEndTags();
1996     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1997     m_tree.openElements()->popUntilPopped(captionTag.localName());
1998     m_tree.activeFormattingElements()->clearToLastMarker();
1999     setInsertionMode(InTableMode);
2000     return true;
2001 }
2002
2003 bool HTMLTreeBuilder::processTrEndTagForInRow()
2004 {
2005     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2006         ASSERT(isParsingFragment());
2007         // FIXME: parse error
2008         return false;
2009     }
2010     m_tree.openElements()->popUntilTableRowScopeMarker();
2011     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2012     m_tree.openElements()->pop();
2013     setInsertionMode(InTableBodyMode);
2014     return true;
2015 }
2016
2017 bool HTMLTreeBuilder::processTableEndTagForInTable()
2018 {
2019     if (!m_tree.openElements()->inTableScope(tableTag)) {
2020         ASSERT(isParsingFragment());
2021         // FIXME: parse error.
2022         return false;
2023     }
2024     m_tree.openElements()->popUntilPopped(tableTag.localName());
2025     resetInsertionModeAppropriately();
2026     return true;
2027 }
2028
2029 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2030 {
2031     ASSERT(token.type() == HTMLToken::EndTag);
2032     if (token.name() == tableTag) {
2033         processTableEndTagForInTable();
2034         return;
2035     }
2036     if (token.name() == bodyTag
2037         || isCaptionColOrColgroupTag(token.name())
2038         || token.name() == htmlTag
2039         || isTableBodyContextTag(token.name())
2040         || isTableCellContextTag(token.name())
2041         || token.name() == trTag) {
2042         parseError(token);
2043         return;
2044     }
2045     // Is this redirection necessary here?
2046     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2047     processEndTagForInBody(token);
2048 }
2049
2050 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2051 {
2052     ASSERT(token.type() == HTMLToken::EndTag);
2053     switch (insertionMode()) {
2054     case InitialMode:
2055         ASSERT(insertionMode() == InitialMode);
2056         defaultForInitial();
2057         // Fall through.
2058     case BeforeHTMLMode:
2059         ASSERT(insertionMode() == BeforeHTMLMode);
2060         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2061             parseError(token);
2062             return;
2063         }
2064         defaultForBeforeHTML();
2065         // Fall through.
2066     case BeforeHeadMode:
2067         ASSERT(insertionMode() == BeforeHeadMode);
2068         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2069             parseError(token);
2070             return;
2071         }
2072         defaultForBeforeHead();
2073         // Fall through.
2074     case InHeadMode:
2075         ASSERT(insertionMode() == InHeadMode);
2076         if (token.name() == headTag) {
2077             m_tree.openElements()->popHTMLHeadElement();
2078             setInsertionMode(AfterHeadMode);
2079             return;
2080         }
2081         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2082             parseError(token);
2083             return;
2084         }
2085         defaultForInHead();
2086         // Fall through.
2087     case AfterHeadMode:
2088         ASSERT(insertionMode() == AfterHeadMode);
2089         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2090             parseError(token);
2091             return;
2092         }
2093         defaultForAfterHead();
2094         // Fall through
2095     case InBodyMode:
2096         ASSERT(insertionMode() == InBodyMode);
2097         processEndTagForInBody(token);
2098         break;
2099     case InTableMode:
2100         ASSERT(insertionMode() == InTableMode);
2101         processEndTagForInTable(token);
2102         break;
2103     case InCaptionMode:
2104         ASSERT(insertionMode() == InCaptionMode);
2105         if (token.name() == captionTag) {
2106             processCaptionEndTagForInCaption();
2107             return;
2108         }
2109         if (token.name() == tableTag) {
2110             parseError(token);
2111             if (!processCaptionEndTagForInCaption()) {
2112                 ASSERT(isParsingFragment());
2113                 return;
2114             }
2115             reprocessEndTag(token);
2116             return;
2117         }
2118         if (token.name() == bodyTag
2119             || token.name() == colTag
2120             || token.name() == colgroupTag
2121             || token.name() == htmlTag
2122             || isTableBodyContextTag(token.name())
2123             || isTableCellContextTag(token.name())
2124             || token.name() == trTag) {
2125             parseError(token);
2126             return;
2127         }
2128         processEndTagForInBody(token);
2129         break;
2130     case InColumnGroupMode:
2131         ASSERT(insertionMode() == InColumnGroupMode);
2132         if (token.name() == colgroupTag) {
2133             processColgroupEndTagForInColumnGroup();
2134             return;
2135         }
2136         if (token.name() == colTag) {
2137             parseError(token);
2138             return;
2139         }
2140         if (!processColgroupEndTagForInColumnGroup()) {
2141             ASSERT(isParsingFragment());
2142             return;
2143         }
2144         reprocessEndTag(token);
2145         break;
2146     case InRowMode:
2147         ASSERT(insertionMode() == InRowMode);
2148         processEndTagForInRow(token);
2149         break;
2150     case InCellMode:
2151         ASSERT(insertionMode() == InCellMode);
2152         processEndTagForInCell(token);
2153         break;
2154     case InTableBodyMode:
2155         ASSERT(insertionMode() == InTableBodyMode);
2156         processEndTagForInTableBody(token);
2157         break;
2158     case AfterBodyMode:
2159         ASSERT(insertionMode() == AfterBodyMode);
2160         if (token.name() == htmlTag) {
2161             if (isParsingFragment()) {
2162                 parseError(token);
2163                 return;
2164             }
2165             setInsertionMode(AfterAfterBodyMode);
2166             return;
2167         }
2168         prepareToReprocessToken();
2169         // Fall through.
2170     case AfterAfterBodyMode:
2171         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2172         parseError(token);
2173         setInsertionMode(InBodyMode);
2174         reprocessEndTag(token);
2175         break;
2176     case InHeadNoscriptMode:
2177         ASSERT(insertionMode() == InHeadNoscriptMode);
2178         if (token.name() == noscriptTag) {
2179             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2180             m_tree.openElements()->pop();
2181             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2182             setInsertionMode(InHeadMode);
2183             return;
2184         }
2185         if (token.name() != brTag) {
2186             parseError(token);
2187             return;
2188         }
2189         defaultForInHeadNoscript();
2190         processToken(token);
2191         break;
2192     case TextMode:
2193         if (token.name() == scriptTag) {
2194             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2195             m_isPaused = true;
2196             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2197             m_scriptToProcess = m_tree.currentElement();
2198             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2199             m_tree.openElements()->pop();
2200             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2201                 m_scriptToProcess->removeAllChildren();
2202             setInsertionMode(m_originalInsertionMode);
2203
2204             // This token will not have been created by the tokenizer if a
2205             // self-closing script tag was encountered and pre-HTML5 parser
2206             // quirks are enabled. We must set the tokenizer's state to
2207             // DataState explicitly if the tokenizer didn't have a chance to.
2208             ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2209             m_tokenizer->setState(HTMLTokenizer::DataState);
2210             return;
2211         }
2212         m_tree.openElements()->pop();
2213         setInsertionMode(m_originalInsertionMode);
2214         break;
2215     case InFramesetMode:
2216         ASSERT(insertionMode() == InFramesetMode);
2217         if (token.name() == framesetTag) {
2218             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2219                 parseError(token);
2220                 return;
2221             }
2222             m_tree.openElements()->pop();
2223             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2224                 setInsertionMode(AfterFramesetMode);
2225             return;
2226         }
2227         break;
2228     case AfterFramesetMode:
2229         ASSERT(insertionMode() == AfterFramesetMode);
2230         if (token.name() == htmlTag) {
2231             setInsertionMode(AfterAfterFramesetMode);
2232             return;
2233         }
2234         // Fall through.
2235     case AfterAfterFramesetMode:
2236         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2237         parseError(token);
2238         break;
2239     case InSelectInTableMode:
2240         ASSERT(insertionMode() == InSelectInTableMode);
2241         if (token.name() == captionTag
2242             || token.name() == tableTag
2243             || isTableBodyContextTag(token.name())
2244             || token.name() == trTag
2245             || isTableCellContextTag(token.name())) {
2246             parseError(token);
2247             if (m_tree.openElements()->inTableScope(token.name())) {
2248                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2249                 processEndTag(endSelect);
2250                 reprocessEndTag(token);
2251             }
2252             return;
2253         }
2254         // Fall through.
2255     case InSelectMode:
2256         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2257         if (token.name() == optgroupTag) {
2258             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2259                 processFakeEndTag(optionTag);
2260             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2261                 m_tree.openElements()->pop();
2262                 return;
2263             }
2264             parseError(token);
2265             return;
2266         }
2267         if (token.name() == optionTag) {
2268             if (m_tree.currentElement()->hasTagName(optionTag)) {
2269                 m_tree.openElements()->pop();
2270                 return;
2271             }
2272             parseError(token);
2273             return;
2274         }
2275         if (token.name() == selectTag) {
2276             if (!m_tree.openElements()->inSelectScope(token.name())) {
2277                 ASSERT(isParsingFragment());
2278                 parseError(token);
2279                 return;
2280             }
2281             m_tree.openElements()->popUntilPopped(selectTag.localName());
2282             resetInsertionModeAppropriately();
2283             return;
2284         }
2285         break;
2286     case InTableTextMode:
2287         defaultForInTableText();
2288         processEndTag(token);
2289         break;
2290     case InForeignContentMode:
2291         if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
2292             notImplemented();
2293             return;
2294         }
2295         if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
2296             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2297             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2298             if (!nodeRecord->element()->hasLocalName(token.name()))
2299                 parseError(token);
2300             while (1) {
2301                 if (nodeRecord->element()->hasLocalName(token.name())) {
2302                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2303                     break;
2304                 }
2305                 nodeRecord = nodeRecord->next();
2306                 if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
2307                     break;
2308             }
2309         }
2310         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2311         processForeignContentUsingInBodyModeAndResetMode(token);
2312         break;
2313     }
2314 }
2315
2316 void HTMLTreeBuilder::prepareToReprocessToken()
2317 {
2318     if (m_hasPendingForeignInsertionModeSteps) {
2319         resetForeignInsertionMode();
2320         m_hasPendingForeignInsertionModeSteps = false;
2321     }
2322 }
2323
2324 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2325 {
2326     prepareToReprocessToken();
2327     processStartTag(token);
2328 }
2329
2330 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2331 {
2332     prepareToReprocessToken();
2333     processEndTag(token);
2334 }
2335
2336 class HTMLTreeBuilder::FakeInsertionMode : public Noncopyable {
2337 public:
2338     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2339         : m_treeBuilder(treeBuilder)
2340         , m_originalMode(treeBuilder->insertionMode())
2341     {
2342         m_treeBuilder->setFakeInsertionMode(mode);
2343     }
2344
2345     ~FakeInsertionMode()
2346     {
2347         if (m_treeBuilder->isFakeInsertionMode())
2348             m_treeBuilder->setInsertionMode(m_originalMode);
2349     }
2350
2351 private:
2352     HTMLTreeBuilder* m_treeBuilder;
2353     InsertionMode m_originalMode;
2354 };
2355
2356 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2357 {
2358     m_hasPendingForeignInsertionModeSteps = true;
2359     {
2360         FakeInsertionMode fakeMode(this, InBodyMode);
2361         processToken(token);
2362     }
2363     if (m_hasPendingForeignInsertionModeSteps)
2364         resetForeignInsertionMode();
2365 }
2366
2367 void HTMLTreeBuilder::resetForeignInsertionMode()
2368 {
2369     if (insertionMode() == InForeignContentMode)
2370         resetInsertionModeAppropriately();
2371 }
2372
2373 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2374 {
2375     ASSERT(token.type() == HTMLToken::Comment);
2376     if (m_insertionMode == InitialMode
2377         || m_insertionMode == BeforeHTMLMode
2378         || m_insertionMode == AfterAfterBodyMode
2379         || m_insertionMode == AfterAfterFramesetMode) {
2380         m_tree.insertCommentOnDocument(token);
2381         return;
2382     }
2383     if (m_insertionMode == AfterBodyMode) {
2384         m_tree.insertCommentOnHTMLHtmlElement(token);
2385         return;
2386     }
2387     if (m_insertionMode == InTableTextMode) {
2388         defaultForInTableText();
2389         processComment(token);
2390         return;
2391     }
2392     m_tree.insertComment(token);
2393 }
2394
2395 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2396 {
2397     ASSERT(token.type() == HTMLToken::Character);
2398     ExternalCharacterTokenBuffer buffer(token);
2399     processCharacterBuffer(buffer);
2400 }
2401
2402 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2403 {
2404 ReprocessBuffer:
2405     switch (insertionMode()) {
2406     case InitialMode: {
2407         ASSERT(insertionMode() == InitialMode);
2408         buffer.skipLeadingWhitespace();
2409         if (buffer.isEmpty())
2410             return;
2411         defaultForInitial();
2412         // Fall through.
2413     }
2414     case BeforeHTMLMode: {
2415         ASSERT(insertionMode() == BeforeHTMLMode);
2416         buffer.skipLeadingWhitespace();
2417         if (buffer.isEmpty())
2418             return;
2419         defaultForBeforeHTML();
2420         // Fall through.
2421     }
2422     case BeforeHeadMode: {
2423         ASSERT(insertionMode() == BeforeHeadMode);
2424         buffer.skipLeadingWhitespace();
2425         if (buffer.isEmpty())
2426             return;
2427         defaultForBeforeHead();
2428         // Fall through.
2429     }
2430     case InHeadMode: {
2431         ASSERT(insertionMode() == InHeadMode);
2432         String leadingWhitespace = buffer.takeLeadingWhitespace();
2433         if (!leadingWhitespace.isEmpty())
2434             m_tree.insertTextNode(leadingWhitespace);
2435         if (buffer.isEmpty())
2436             return;
2437         defaultForInHead();
2438         // Fall through.
2439     }
2440     case AfterHeadMode: {
2441         ASSERT(insertionMode() == AfterHeadMode);
2442         String leadingWhitespace = buffer.takeLeadingWhitespace();
2443         if (!leadingWhitespace.isEmpty())
2444             m_tree.insertTextNode(leadingWhitespace);
2445         if (buffer.isEmpty())
2446             return;
2447         defaultForAfterHead();
2448         // Fall through.
2449     }
2450     case InBodyMode:
2451     case InCaptionMode:
2452     case InCellMode: {
2453         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2454         m_tree.reconstructTheActiveFormattingElements();
2455         String characters = buffer.takeRemaining();
2456         m_tree.insertTextNode(characters);
2457         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2458             m_framesetOk = false;
2459         break;
2460     }
2461     case InTableMode:
2462     case InTableBodyMode:
2463     case InRowMode: {
2464         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2465         ASSERT(m_pendingTableCharacters.isEmpty());
2466         m_originalInsertionMode = m_insertionMode;
2467         setInsertionMode(InTableTextMode);
2468         prepareToReprocessToken();
2469         // Fall through.
2470     }
2471     case InTableTextMode: {
2472         buffer.giveRemainingTo(m_pendingTableCharacters);
2473         break;
2474     }
2475     case InColumnGroupMode: {
2476         ASSERT(insertionMode() == InColumnGroupMode);
2477         String leadingWhitespace = buffer.takeLeadingWhitespace();
2478         if (!leadingWhitespace.isEmpty())
2479             m_tree.insertTextNode(leadingWhitespace);
2480         if (buffer.isEmpty())
2481             return;
2482         if (!processColgroupEndTagForInColumnGroup()) {
2483             ASSERT(isParsingFragment());
2484             // The spec tells us to drop these characters on the floor.
2485             buffer.takeLeadingNonWhitespace();
2486             if (buffer.isEmpty())
2487                 return;
2488         }
2489         prepareToReprocessToken();
2490         goto ReprocessBuffer;
2491     }
2492     case AfterBodyMode:
2493     case AfterAfterBodyMode: {
2494         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2495         // FIXME: parse error
2496         setInsertionMode(InBodyMode);
2497         prepareToReprocessToken();
2498         goto ReprocessBuffer;
2499         break;
2500     }
2501     case TextMode: {
2502         ASSERT(insertionMode() == TextMode);
2503         m_tree.insertTextNode(buffer.takeRemaining());
2504         break;
2505     }
2506     case InHeadNoscriptMode: {
2507         ASSERT(insertionMode() == InHeadNoscriptMode);
2508         String leadingWhitespace = buffer.takeLeadingWhitespace();
2509         if (!leadingWhitespace.isEmpty())
2510             m_tree.insertTextNode(leadingWhitespace);
2511         if (buffer.isEmpty())
2512             return;
2513         defaultForInHeadNoscript();
2514         goto ReprocessBuffer;
2515         break;
2516     }
2517     case InFramesetMode:
2518     case AfterFramesetMode: {
2519         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2520         String leadingWhitespace = buffer.takeRemainingWhitespace();
2521         if (!leadingWhitespace.isEmpty())
2522             m_tree.insertTextNode(leadingWhitespace);
2523         // FIXME: We should generate a parse error if we skipped over any
2524         // non-whitespace characters.
2525         break;
2526     }
2527     case InSelectInTableMode:
2528     case InSelectMode: {
2529         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2530         m_tree.insertTextNode(buffer.takeRemaining());
2531         break;
2532     }
2533     case InForeignContentMode: {
2534         ASSERT(insertionMode() == InForeignContentMode);
2535         String characters = buffer.takeRemaining();
2536         m_tree.insertTextNode(characters);
2537         if (m_framesetOk && !isAllWhitespace(characters))
2538             m_framesetOk = false;
2539         break;
2540     }
2541     case AfterAfterFramesetMode: {
2542         String leadingWhitespace = buffer.takeRemainingWhitespace();
2543         if (!leadingWhitespace.isEmpty()) {
2544             m_tree.reconstructTheActiveFormattingElements();
2545             m_tree.insertTextNode(leadingWhitespace);
2546         }
2547         // FIXME: We should generate a parse error if we skipped over any
2548         // non-whitespace characters.
2549         break;
2550     }
2551     }
2552 }
2553
2554 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2555 {
2556     ASSERT(token.type() == HTMLToken::EndOfFile);
2557     switch (insertionMode()) {
2558     case InitialMode:
2559         ASSERT(insertionMode() == InitialMode);
2560         defaultForInitial();
2561         // Fall through.
2562     case BeforeHTMLMode:
2563         ASSERT(insertionMode() == BeforeHTMLMode);
2564         defaultForBeforeHTML();
2565         // Fall through.
2566     case BeforeHeadMode:
2567         ASSERT(insertionMode() == BeforeHeadMode);
2568         defaultForBeforeHead();
2569         // Fall through.
2570     case InHeadMode:
2571         ASSERT(insertionMode() == InHeadMode);
2572         defaultForInHead();
2573         // Fall through.
2574     case AfterHeadMode:
2575         ASSERT(insertionMode() == AfterHeadMode);
2576         defaultForAfterHead();
2577         // Fall through
2578     case InBodyMode:
2579     case InCellMode:
2580     case InCaptionMode:
2581     case InRowMode:
2582         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2583         notImplemented(); // Emit parse error based on what elements are still open.
2584         break;
2585     case AfterBodyMode:
2586     case AfterAfterBodyMode:
2587         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2588         return;
2589     case InHeadNoscriptMode:
2590         ASSERT(insertionMode() == InHeadNoscriptMode);
2591         defaultForInHeadNoscript();
2592         processEndOfFile(token);
2593         return;
2594     case AfterFramesetMode:
2595     case AfterAfterFramesetMode:
2596         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2597         break;
2598     case InFramesetMode:
2599     case InTableMode:
2600     case InTableBodyMode:
2601     case InSelectInTableMode:
2602     case InSelectMode:
2603         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2604         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2605             parseError(token);
2606         break;
2607     case InColumnGroupMode:
2608         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2609             ASSERT(isParsingFragment());
2610             return;
2611         }
2612         if (!processColgroupEndTagForInColumnGroup()) {
2613             ASSERT(isParsingFragment());
2614             return;
2615         }
2616         prepareToReprocessToken();
2617         processEndOfFile(token);
2618         return;
2619     case InForeignContentMode:
2620         setInsertionMode(InBodyMode);
2621         processEndOfFile(token);
2622         return;
2623     case InTableTextMode:
2624         defaultForInTableText();
2625         processEndOfFile(token);
2626         return;
2627     case TextMode:
2628         parseError(token);
2629         if (m_tree.currentElement()->hasTagName(scriptTag))
2630             notImplemented(); // mark the script element as "already started".
2631         m_tree.openElements()->pop();
2632         setInsertionMode(m_originalInsertionMode);
2633         prepareToReprocessToken();
2634         processEndOfFile(token);
2635         return;
2636     }
2637     ASSERT(m_tree.openElements()->top());
2638     m_tree.openElements()->popAll();
2639 }
2640
2641 void HTMLTreeBuilder::defaultForInitial()
2642 {
2643     notImplemented();
2644     if (!m_fragmentContext.fragment())
2645         m_document->setCompatibilityMode(Document::QuirksMode);
2646     // FIXME: parse error
2647     setInsertionMode(BeforeHTMLMode);
2648     prepareToReprocessToken();
2649 }
2650
2651 void HTMLTreeBuilder::defaultForBeforeHTML()
2652 {
2653     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2654     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2655     setInsertionMode(BeforeHeadMode);
2656     prepareToReprocessToken();
2657 }
2658
2659 void HTMLTreeBuilder::defaultForBeforeHead()
2660 {
2661     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2662     processStartTag(startHead);
2663     prepareToReprocessToken();
2664 }
2665
2666 void HTMLTreeBuilder::defaultForInHead()
2667 {
2668     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2669     processEndTag(endHead);
2670     prepareToReprocessToken();
2671 }
2672
2673 void HTMLTreeBuilder::defaultForInHeadNoscript()
2674 {
2675     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2676     processEndTag(endNoscript);
2677     prepareToReprocessToken();
2678 }
2679
2680 void HTMLTreeBuilder::defaultForAfterHead()
2681 {
2682     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2683     processStartTag(startBody);
2684     m_framesetOk = true;
2685     prepareToReprocessToken();
2686 }
2687
2688 void HTMLTreeBuilder::defaultForInTableText()
2689 {
2690     String characters = String::adopt(m_pendingTableCharacters);
2691     if (!isAllWhitespace(characters)) {
2692         // FIXME: parse error
2693         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2694         m_tree.reconstructTheActiveFormattingElements();
2695         m_tree.insertTextNode(characters);
2696         m_framesetOk = false;
2697         setInsertionMode(m_originalInsertionMode);
2698         prepareToReprocessToken();
2699         return;
2700     }
2701     m_tree.insertTextNode(characters);
2702     setInsertionMode(m_originalInsertionMode);
2703     prepareToReprocessToken();
2704 }
2705
2706 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2707 {
2708     ASSERT(token.type() == HTMLToken::StartTag);
2709     if (token.name() == htmlTag) {
2710         m_tree.insertHTMLHtmlStartTagInBody(token);
2711         return true;
2712     }
2713     if (token.name() == baseTag
2714         || token.name() == basefontTag
2715         || token.name() == bgsoundTag
2716         || token.name() == commandTag
2717         || token.name() == linkTag
2718         || token.name() == metaTag) {
2719         m_tree.insertSelfClosingHTMLElement(token);
2720         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2721         return true;
2722     }
2723     if (token.name() == titleTag) {
2724         processGenericRCDATAStartTag(token);
2725         return true;
2726     }
2727     if (token.name() == noscriptTag) {
2728         if (scriptEnabled(m_document->frame())) {
2729             processGenericRawTextStartTag(token);
2730             return true;
2731         }
2732         m_tree.insertHTMLElement(token);
2733         setInsertionMode(InHeadNoscriptMode);
2734         return true;
2735     }
2736     if (token.name() == noframesTag || token.name() == styleTag) {
2737         processGenericRawTextStartTag(token);
2738         return true;
2739     }
2740     if (token.name() == scriptTag) {
2741         processScriptStartTag(token);
2742         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2743             processFakeEndTag(scriptTag);
2744         return true;
2745     }
2746     if (token.name() == headTag) {
2747         parseError(token);
2748         return true;
2749     }
2750     return false;
2751 }
2752
2753 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2754 {
2755     ASSERT(token.type() == HTMLToken::StartTag);
2756     m_tree.insertHTMLElement(token);
2757     m_tokenizer->setState(HTMLTokenizer::RCDATAState);
2758     m_originalInsertionMode = m_insertionMode;
2759     setInsertionMode(TextMode);
2760 }
2761
2762 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2763 {
2764     ASSERT(token.type() == HTMLToken::StartTag);
2765     m_tree.insertHTMLElement(token);
2766     m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
2767     m_originalInsertionMode = m_insertionMode;
2768     setInsertionMode(TextMode);
2769 }
2770
2771 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2772 {
2773     ASSERT(token.type() == HTMLToken::StartTag);
2774     m_tree.insertScriptElement(token);
2775     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
2776     m_originalInsertionMode = m_insertionMode;
2777
2778     TextPosition0 position = TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(m_tokenizer->lineNumber()), WTF::ZeroBasedNumber::base());
2779     m_lastScriptElementStartPosition = position;
2780
2781     setInsertionMode(TextMode);
2782 }
2783
2784 void HTMLTreeBuilder::finished()
2785 {
2786     ASSERT(m_document);
2787     if (isParsingFragment()) {
2788         m_fragmentContext.finished();
2789         return;
2790     }
2791
2792     // Warning, this may detach the parser. Do not do anything else after this.
2793     m_document->finishedParsing();
2794 }
2795
2796 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2797 {
2798     if (!frame)
2799         return false;
2800     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2801 }
2802
2803 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2804 {
2805     if (!frame)
2806         return false;
2807     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2808 }
2809
2810 }