6b58afaea265f41d08d7f6590d700582b92143a7
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "CharacterNames.h"
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLDocumentParser.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLParserIdioms.h"
41 #include "HTMLScriptElement.h"
42 #include "HTMLToken.h"
43 #include "HTMLTokenizer.h"
44 #include "LocalizedStrings.h"
45 #include "MathMLNames.h"
46 #include "NotImplemented.h"
47 #include "SVGNames.h"
48 #include "ScriptController.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53
54 namespace WebCore {
55
56 using namespace HTMLNames;
57
58 static const int uninitializedLineNumberValue = -1;
59
60 static TextPosition1 uninitializedPositionValue1()
61 {
62     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
63 }
64
65 namespace {
66
67 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
68 {
69     return isHTMLSpace(character) || character == replacementCharacter;
70 }
71
72 inline bool isAllWhitespace(const String& string)
73 {
74     return string.isAllSpecialCharacters<isHTMLSpace>();
75 }
76
77 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
78 {
79     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
80 }
81
82 bool isNumberedHeaderTag(const AtomicString& tagName)
83 {
84     return tagName == h1Tag
85         || tagName == h2Tag
86         || tagName == h3Tag
87         || tagName == h4Tag
88         || tagName == h5Tag
89         || tagName == h6Tag;
90 }
91
92 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
93 {
94     return tagName == captionTag
95         || tagName == colTag
96         || tagName == colgroupTag;
97 }
98
99 bool isTableCellContextTag(const AtomicString& tagName)
100 {
101     return tagName == thTag || tagName == tdTag;
102 }
103
104 bool isTableBodyContextTag(const AtomicString& tagName)
105 {
106     return tagName == tbodyTag
107         || tagName == tfootTag
108         || tagName == theadTag;
109 }
110
111 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
112 bool isSpecialNode(Node* node)
113 {
114     if (node->hasTagName(MathMLNames::miTag)
115         || node->hasTagName(MathMLNames::moTag)
116         || node->hasTagName(MathMLNames::mnTag)
117         || node->hasTagName(MathMLNames::msTag)
118         || node->hasTagName(MathMLNames::mtextTag)
119         || node->hasTagName(MathMLNames::annotation_xmlTag)
120         || node->hasTagName(SVGNames::foreignObjectTag)
121         || node->hasTagName(SVGNames::descTag)
122         || node->hasTagName(SVGNames::titleTag))
123         return true;
124     if (node->namespaceURI() != xhtmlNamespaceURI)
125         return false;
126     const AtomicString& tagName = node->localName();
127     return tagName == addressTag
128         || tagName == appletTag
129         || tagName == areaTag
130         || tagName == articleTag
131         || tagName == asideTag
132         || tagName == baseTag
133         || tagName == basefontTag
134         || tagName == bgsoundTag
135         || tagName == blockquoteTag
136         || tagName == bodyTag
137         || tagName == brTag
138         || tagName == buttonTag
139         || tagName == captionTag
140         || tagName == centerTag
141         || tagName == colTag
142         || tagName == colgroupTag
143         || tagName == commandTag
144         || tagName == ddTag
145         || tagName == detailsTag
146         || tagName == dirTag
147         || tagName == divTag
148         || tagName == dlTag
149         || tagName == dtTag
150         || tagName == embedTag
151         || tagName == fieldsetTag
152         || tagName == figcaptionTag
153         || tagName == figureTag
154         || tagName == footerTag
155         || tagName == formTag
156         || tagName == frameTag
157         || tagName == framesetTag
158         || isNumberedHeaderTag(tagName)
159         || tagName == headTag
160         || tagName == headerTag
161         || tagName == hgroupTag
162         || tagName == hrTag
163         || tagName == htmlTag
164         || tagName == iframeTag
165         || tagName == imgTag
166         || tagName == inputTag
167         || tagName == isindexTag
168         || tagName == liTag
169         || tagName == linkTag
170         || tagName == listingTag
171         || tagName == marqueeTag
172         || tagName == menuTag
173         || tagName == metaTag
174         || tagName == navTag
175         || tagName == noembedTag
176         || tagName == noframesTag
177         || tagName == noscriptTag
178         || tagName == objectTag
179         || tagName == olTag
180         || tagName == pTag
181         || tagName == paramTag
182         || tagName == plaintextTag
183         || tagName == preTag
184         || tagName == scriptTag
185         || tagName == sectionTag
186         || tagName == selectTag
187         || tagName == styleTag
188         || tagName == summaryTag
189         || tagName == tableTag
190         || isTableBodyContextTag(tagName)
191         || tagName == tdTag
192         || tagName == textareaTag
193         || tagName == thTag
194         || tagName == titleTag
195         || tagName == trTag
196         || tagName == ulTag
197         || tagName == wbrTag
198         || tagName == xmpTag;
199 }
200
201 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
202 {
203     return tagName == bTag
204         || tagName == bigTag
205         || tagName == codeTag
206         || tagName == emTag
207         || tagName == fontTag
208         || tagName == iTag
209         || tagName == sTag
210         || tagName == smallTag
211         || tagName == strikeTag
212         || tagName == strongTag
213         || tagName == ttTag
214         || tagName == uTag;
215 }
216
217 bool isNonAnchorFormattingTag(const AtomicString& tagName)
218 {
219     return tagName == nobrTag
220         || isNonAnchorNonNobrFormattingTag(tagName);
221 }
222
223 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
224 bool isFormattingTag(const AtomicString& tagName)
225 {
226     return tagName == aTag || isNonAnchorFormattingTag(tagName);
227 }
228
229 HTMLFormElement* closestFormAncestor(Element* element)
230 {
231     while (element) {
232         if (element->hasTagName(formTag))
233             return static_cast<HTMLFormElement*>(element);
234         ContainerNode* parent = element->parentNode();
235         if (!parent || !parent->isElementNode())
236             return 0;
237         element = static_cast<Element*>(parent);
238     }
239     return 0;
240 }
241
242 } // namespace
243
244 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
245     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
246 public:
247     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
248         : m_current(token.characters().data())
249         , m_end(m_current + token.characters().size())
250     {
251         ASSERT(!isEmpty());
252     }
253
254     explicit ExternalCharacterTokenBuffer(const String& string)
255         : m_current(string.characters())
256         , m_end(m_current + string.length())
257     {
258         ASSERT(!isEmpty());
259     }
260
261     ~ExternalCharacterTokenBuffer()
262     {
263         ASSERT(isEmpty());
264     }
265
266     bool isEmpty() const { return m_current == m_end; }
267
268     void skipLeadingWhitespace()
269     {
270         skipLeading<isHTMLSpace>();
271     }
272
273     String takeLeadingWhitespace()
274     {
275         return takeLeading<isHTMLSpace>();
276     }
277
278     String takeLeadingNonWhitespace()
279     {
280         return takeLeading<isNotHTMLSpace>();
281     }
282
283     String takeRemaining()
284     {
285         ASSERT(!isEmpty());
286         const UChar* start = m_current;
287         m_current = m_end;
288         return String(start, m_current - start);
289     }
290
291     void giveRemainingTo(Vector<UChar>& recipient)
292     {
293         recipient.append(m_current, m_end - m_current);
294         m_current = m_end;
295     }
296
297     String takeRemainingWhitespace()
298     {
299         ASSERT(!isEmpty());
300         Vector<UChar> whitespace;
301         do {
302             UChar cc = *m_current++;
303             if (isHTMLSpace(cc))
304                 whitespace.append(cc);
305         } while (m_current < m_end);
306         // Returning the null string when there aren't any whitespace
307         // characters is slightly cleaner semantically because we don't want
308         // to insert a text node (as opposed to inserting an empty text node).
309         if (whitespace.isEmpty())
310             return String();
311         return String::adopt(whitespace);
312     }
313
314 private:
315     template<bool characterPredicate(UChar)>
316     void skipLeading()
317     {
318         ASSERT(!isEmpty());
319         while (characterPredicate(*m_current)) {
320             if (++m_current == m_end)
321                 return;
322         }
323     }
324
325     template<bool characterPredicate(UChar)>
326     String takeLeading()
327     {
328         ASSERT(!isEmpty());
329         const UChar* start = m_current;
330         skipLeading<characterPredicate>();
331         if (start == m_current)
332             return String();
333         return String(start, m_current - start);
334     }
335
336     const UChar* m_current;
337     const UChar* m_end;
338 };
339
340
341 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
342     : m_framesetOk(true)
343     , m_document(document)
344     , m_tree(document, FragmentScriptingAllowed, false)
345     , m_reportErrors(reportErrors)
346     , m_isPaused(false)
347     , m_insertionMode(InitialMode)
348     , m_originalInsertionMode(InitialMode)
349     , m_parser(parser)
350     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
351     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
352     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
353     , m_hasPendingForeignInsertionModeSteps(false)
354 {
355 }
356
357 // FIXME: Member variables should be grouped into self-initializing structs to
358 // minimize code duplication between these constructors.
359 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
360     : m_framesetOk(true)
361     , m_fragmentContext(fragment, contextElement, scriptingPermission)
362     , m_document(m_fragmentContext.document())
363     , m_tree(m_document, scriptingPermission, true)
364     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
365     , m_isPaused(false)
366     , m_insertionMode(InitialMode)
367     , m_originalInsertionMode(InitialMode)
368     , m_parser(parser)
369     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
370     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
371     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
372     , m_hasPendingForeignInsertionModeSteps(false)
373 {
374     if (contextElement) {
375         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
376         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
377         m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
378         processFakeStartTag(htmlTag);
379         resetInsertionModeAppropriately();
380         m_tree.setForm(closestFormAncestor(contextElement));
381     }
382 }
383
384 HTMLTreeBuilder::~HTMLTreeBuilder()
385 {
386 }
387
388 void HTMLTreeBuilder::detach()
389 {
390     // This call makes little sense in fragment mode, but for consistency
391     // DocumentParser expects detach() to always be called before it's destroyed.
392     m_document = 0;
393     // HTMLConstructionSite might be on the callstack when detach() is called
394     // otherwise we'd just call m_tree.clear() here instead.
395     m_tree.detach();
396 }
397
398 // NOTE: HTML5 requires that we use a dummy document when parsing
399 // document fragments.  However, creating a new Document element
400 // for each fragment is very slow (Document() does too much work, and
401 // innerHTML is a common call).  So we use a shared dummy document.
402 // This sharing works because there can only ever be one fragment
403 // parser at any time.  Fragment parsing is synchronous and done
404 // only from the main thread.  It should be impossible for javascript
405 // (or anything else) to ever hold a reference to the dummy document.
406 // See https://bugs.webkit.org/show_bug.cgi?id=48719
407 class DummyDocumentFactory {
408     WTF_MAKE_NONCOPYABLE(DummyDocumentFactory); WTF_MAKE_FAST_ALLOCATED;
409 public:
410     // Use an explicit create/release here to ASSERT this sharing is safe.
411     static HTMLDocument* createDummyDocument();
412     static void releaseDocument(HTMLDocument*);
413
414 private:
415     static HTMLDocument* s_sharedDummyDocument;
416     static int s_sharedDummyDocumentMutex;
417 };
418
419 HTMLDocument* DummyDocumentFactory::createDummyDocument()
420 {
421     if (!s_sharedDummyDocument) {
422         s_sharedDummyDocument = HTMLDocument::create(0, KURL()).releaseRef();
423         s_sharedDummyDocumentMutex = 0;
424     }
425     ASSERT(!s_sharedDummyDocumentMutex);
426     ASSERT(!s_sharedDummyDocument->hasChildNodes());
427     s_sharedDummyDocumentMutex++;
428     return s_sharedDummyDocument;
429 }
430
431 void DummyDocumentFactory::releaseDocument(HTMLDocument* dummyDocument)
432 {
433     ASSERT(s_sharedDummyDocument == dummyDocument);
434     s_sharedDummyDocumentMutex--;
435     ASSERT(!s_sharedDummyDocumentMutex);
436     dummyDocument->removeAllChildren();
437 }
438
439 HTMLDocument* DummyDocumentFactory::s_sharedDummyDocument = 0;
440 int DummyDocumentFactory::s_sharedDummyDocumentMutex = 0;
441
442 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
443     : m_fragment(0)
444     , m_contextElement(0)
445     , m_scriptingPermission(FragmentScriptingAllowed)
446 {
447 }
448
449 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
450     : m_dummyDocumentForFragmentParsing(DummyDocumentFactory::createDummyDocument())
451     , m_fragment(fragment)
452     , m_contextElement(contextElement)
453     , m_scriptingPermission(scriptingPermission)
454 {
455     m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
456     // Setting the baseURL should work the same as it would have had we passed
457     // it during HTMLDocument() construction, since the new document is empty.
458     m_dummyDocumentForFragmentParsing->setURL(fragment->document()->baseURI());
459 }
460
461 Document* HTMLTreeBuilder::FragmentParsingContext::document() const
462 {
463     ASSERT(m_fragment);
464     return m_dummyDocumentForFragmentParsing;
465 }
466
467 void HTMLTreeBuilder::FragmentParsingContext::finished()
468 {
469     // Populate the DocumentFragment with the parsed content now that we're done.
470     ContainerNode* root = m_dummyDocumentForFragmentParsing;
471     if (m_contextElement)
472         root = m_dummyDocumentForFragmentParsing->documentElement();
473     m_fragment->takeAllChildrenFrom(root);
474     ASSERT(!m_dummyDocumentForFragmentParsing->hasChildNodes());
475     DummyDocumentFactory::releaseDocument(m_dummyDocumentForFragmentParsing);
476     m_dummyDocumentForFragmentParsing = 0;
477 }
478
479 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
480 {
481 }
482
483 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
484 {
485     // Unpause ourselves, callers may pause us again when processing the script.
486     // The HTML5 spec is written as though scripts are executed inside the tree
487     // builder.  We pause the parser to exit the tree builder, and then resume
488     // before running scripts.
489     m_isPaused = false;
490     scriptStartPosition = m_scriptToProcessStartPosition;
491     m_scriptToProcessStartPosition = uninitializedPositionValue1();
492     return m_scriptToProcess.release();
493 }
494
495 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
496 {
497     AtomicHTMLToken token(rawToken);
498     constructTreeFromAtomicToken(token);
499 }
500
501 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
502 {
503     processToken(token);
504
505     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
506     // the U+0000 characters into replacement characters has compatibility
507     // problems.
508     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
509     m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
510 }
511
512 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
513 {
514     switch (token.type()) {
515     case HTMLToken::Uninitialized:
516         ASSERT_NOT_REACHED();
517         break;
518     case HTMLToken::DOCTYPE:
519         processDoctypeToken(token);
520         break;
521     case HTMLToken::StartTag:
522         processStartTag(token);
523         break;
524     case HTMLToken::EndTag:
525         processEndTag(token);
526         break;
527     case HTMLToken::Comment:
528         processComment(token);
529         return;
530     case HTMLToken::Character:
531         processCharacter(token);
532         break;
533     case HTMLToken::EndOfFile:
534         processEndOfFile(token);
535         break;
536     }
537 }
538
539 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
540 {
541     ASSERT(token.type() == HTMLToken::DOCTYPE);
542     if (m_insertionMode == InitialMode) {
543         m_tree.insertDoctype(token);
544         setInsertionMode(BeforeHTMLMode);
545         return;
546     }
547     if (m_insertionMode == InTableTextMode) {
548         defaultForInTableText();
549         processDoctypeToken(token);
550         return;
551     }
552     parseError(token);
553 }
554
555 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
556 {
557     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
558     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
559     processStartTag(fakeToken);
560 }
561
562 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
563 {
564     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
565     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
566     processEndTag(fakeToken);
567 }
568
569 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
570 {
571     ASSERT(!characters.isEmpty());
572     ExternalCharacterTokenBuffer buffer(characters);
573     processCharacterBuffer(buffer);
574 }
575
576 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
577 {
578     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
579         return;
580     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
581     processEndTag(endP);
582 }
583
584 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
585 {
586     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
587     if (!attributes)
588         attributes = NamedNodeMap::create();
589     else {
590         attributes->removeAttribute(nameAttr);
591         attributes->removeAttribute(actionAttr);
592         attributes->removeAttribute(promptAttr);
593     }
594
595     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
596     attributes->insertAttribute(mappedAttribute.release(), false);
597     return attributes.release();
598 }
599
600 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
601 {
602     ASSERT(token.type() == HTMLToken::StartTag);
603     ASSERT(token.name() == isindexTag);
604     parseError(token);
605     if (m_tree.form())
606         return;
607     notImplemented(); // Acknowledge self-closing flag
608     processFakeStartTag(formTag);
609     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
610     if (actionAttribute) {
611         ASSERT(m_tree.currentElement()->hasTagName(formTag));
612         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
613     }
614     processFakeStartTag(hrTag);
615     processFakeStartTag(labelTag);
616     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
617     if (promptAttribute)
618         processFakeCharacters(promptAttribute->value());
619     else
620         processFakeCharacters(searchableIndexIntroduction());
621     processFakeStartTag(inputTag, attributesForIsindexInput(token));
622     notImplemented(); // This second set of characters may be needed by non-english locales.
623     processFakeEndTag(labelTag);
624     processFakeStartTag(hrTag);
625     processFakeEndTag(formTag);
626 }
627
628 namespace {
629
630 bool isLi(const Element* element)
631 {
632     return element->hasTagName(liTag);
633 }
634
635 bool isDdOrDt(const Element* element)
636 {
637     return element->hasTagName(ddTag)
638         || element->hasTagName(dtTag);
639 }
640
641 }
642
643 template <bool shouldClose(const Element*)>
644 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
645 {
646     m_framesetOk = false;
647     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
648     while (1) {
649         Element* node = nodeRecord->element();
650         if (shouldClose(node)) {
651             processFakeEndTag(node->tagQName());
652             break;
653         }
654         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
655             break;
656         nodeRecord = nodeRecord->next();
657     }
658     processFakePEndTagIfPInButtonScope();
659     m_tree.insertHTMLElement(token);
660 }
661
662 namespace {
663
664 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
665
666 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
667 {
668     for (size_t i = 0; i < length; ++i) {
669         const QualifiedName& name = *names[i];
670         const AtomicString& localName = name.localName();
671         AtomicString loweredLocalName = localName.lower();
672         if (loweredLocalName != localName)
673             map->add(loweredLocalName, name);
674     }
675 }
676
677 void adjustSVGTagNameCase(AtomicHTMLToken& token)
678 {
679     static PrefixedNameToQualifiedNameMap* caseMap = 0;
680     if (!caseMap) {
681         caseMap = new PrefixedNameToQualifiedNameMap;
682         size_t length = 0;
683         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
684         mapLoweredLocalNameToName(caseMap, svgTags, length);
685     }
686
687     const QualifiedName& casedName = caseMap->get(token.name());
688     if (casedName.localName().isNull())
689         return;
690     token.setName(casedName.localName());
691 }
692
693 template<QualifiedName** getAttrs(size_t* length)>
694 void adjustAttributes(AtomicHTMLToken& token)
695 {
696     static PrefixedNameToQualifiedNameMap* caseMap = 0;
697     if (!caseMap) {
698         caseMap = new PrefixedNameToQualifiedNameMap;
699         size_t length = 0;
700         QualifiedName** attrs = getAttrs(&length);
701         mapLoweredLocalNameToName(caseMap, attrs, length);
702     }
703
704     NamedNodeMap* attributes = token.attributes();
705     if (!attributes)
706         return;
707
708     for (unsigned x = 0; x < attributes->length(); ++x) {
709         Attribute* attribute = attributes->attributeItem(x);
710         const QualifiedName& casedName = caseMap->get(attribute->localName());
711         if (!casedName.localName().isNull())
712             attribute->parserSetName(casedName);
713     }
714 }
715
716 void adjustSVGAttributes(AtomicHTMLToken& token)
717 {
718     adjustAttributes<SVGNames::getSVGAttrs>(token);
719 }
720
721 void adjustMathMLAttributes(AtomicHTMLToken& token)
722 {
723     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
724 }
725
726 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
727 {
728     for (size_t i = 0; i < length; ++i) {
729         QualifiedName* name = names[i];
730         const AtomicString& localName = name->localName();
731         AtomicString prefixColonLocalName(prefix + ":" + localName);
732         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
733         map->add(prefixColonLocalName, nameWithPrefix);
734     }
735 }
736
737 void adjustForeignAttributes(AtomicHTMLToken& token)
738 {
739     static PrefixedNameToQualifiedNameMap* map = 0;
740     if (!map) {
741         map = new PrefixedNameToQualifiedNameMap;
742         size_t length = 0;
743         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
744         addNamesWithPrefix(map, "xlink", attrs, length);
745
746         attrs = XMLNames::getXMLAttrs(&length);
747         addNamesWithPrefix(map, "xml", attrs, length);
748
749         map->add("xmlns", XMLNSNames::xmlnsAttr);
750         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
751     }
752
753     NamedNodeMap* attributes = token.attributes();
754     if (!attributes)
755         return;
756
757     for (unsigned x = 0; x < attributes->length(); ++x) {
758         Attribute* attribute = attributes->attributeItem(x);
759         const QualifiedName& name = map->get(attribute->localName());
760         if (!name.localName().isNull())
761             attribute->parserSetName(name);
762     }
763 }
764
765 }
766
767 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
768 {
769     ASSERT(token.type() == HTMLToken::StartTag);
770     if (token.name() == htmlTag) {
771         m_tree.insertHTMLHtmlStartTagInBody(token);
772         return;
773     }
774     if (token.name() == baseTag
775         || token.name() == basefontTag
776         || token.name() == bgsoundTag
777         || token.name() == commandTag
778         || token.name() == linkTag
779         || token.name() == metaTag
780         || token.name() == noframesTag
781         || token.name() == scriptTag
782         || token.name() == styleTag
783         || token.name() == titleTag) {
784         bool didProcess = processStartTagForInHead(token);
785         ASSERT_UNUSED(didProcess, didProcess);
786         return;
787     }
788     if (token.name() == bodyTag) {
789         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
790             ASSERT(isParsingFragment());
791             return;
792         }
793         m_tree.insertHTMLBodyStartTagInBody(token);
794         return;
795     }
796     if (token.name() == framesetTag) {
797         parseError(token);
798         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
799             ASSERT(isParsingFragment());
800             return;
801         }
802         if (!m_framesetOk)
803             return;
804         ExceptionCode ec = 0;
805         m_tree.openElements()->bodyElement()->remove(ec);
806         ASSERT(!ec);
807         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
808         m_tree.openElements()->popHTMLBodyElement();
809         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
810         m_tree.insertHTMLElement(token);
811         setInsertionMode(InFramesetMode);
812         return;
813     }
814     if (token.name() == addressTag
815         || token.name() == articleTag
816         || token.name() == asideTag
817         || token.name() == blockquoteTag
818         || token.name() == centerTag
819         || token.name() == detailsTag
820         || token.name() == dirTag
821         || token.name() == divTag
822         || token.name() == dlTag
823         || token.name() == fieldsetTag
824         || token.name() == figcaptionTag
825         || token.name() == figureTag
826         || token.name() == footerTag
827         || token.name() == headerTag
828         || token.name() == hgroupTag
829         || token.name() == menuTag
830         || token.name() == navTag
831         || token.name() == olTag
832         || token.name() == pTag
833         || token.name() == sectionTag
834         || token.name() == summaryTag
835         || token.name() == ulTag) {
836         processFakePEndTagIfPInButtonScope();
837         m_tree.insertHTMLElement(token);
838         return;
839     }
840     if (isNumberedHeaderTag(token.name())) {
841         processFakePEndTagIfPInButtonScope();
842         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
843             parseError(token);
844             m_tree.openElements()->pop();
845         }
846         m_tree.insertHTMLElement(token);
847         return;
848     }
849     if (token.name() == preTag || token.name() == listingTag) {
850         processFakePEndTagIfPInButtonScope();
851         m_tree.insertHTMLElement(token);
852         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
853         m_framesetOk = false;
854         return;
855     }
856     if (token.name() == formTag) {
857         if (m_tree.form()) {
858             parseError(token);
859             return;
860         }
861         processFakePEndTagIfPInButtonScope();
862         m_tree.insertHTMLFormElement(token);
863         return;
864     }
865     if (token.name() == liTag) {
866         processCloseWhenNestedTag<isLi>(token);
867         return;
868     }
869     if (token.name() == ddTag || token.name() == dtTag) {
870         processCloseWhenNestedTag<isDdOrDt>(token);
871         return;
872     }
873     if (token.name() == plaintextTag) {
874         processFakePEndTagIfPInButtonScope();
875         m_tree.insertHTMLElement(token);
876         m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
877         return;
878     }
879     if (token.name() == buttonTag) {
880         if (m_tree.openElements()->inScope(buttonTag)) {
881             parseError(token);
882             processFakeEndTag(buttonTag);
883             reprocessStartTag(token); // FIXME: Could we just fall through here?
884             return;
885         }
886         m_tree.reconstructTheActiveFormattingElements();
887         m_tree.insertHTMLElement(token);
888         m_framesetOk = false;
889         return;
890     }
891     if (token.name() == aTag) {
892         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
893         if (activeATag) {
894             parseError(token);
895             processFakeEndTag(aTag);
896             m_tree.activeFormattingElements()->remove(activeATag);
897             if (m_tree.openElements()->contains(activeATag))
898                 m_tree.openElements()->remove(activeATag);
899         }
900         m_tree.reconstructTheActiveFormattingElements();
901         m_tree.insertFormattingElement(token);
902         return;
903     }
904     if (isNonAnchorNonNobrFormattingTag(token.name())) {
905         m_tree.reconstructTheActiveFormattingElements();
906         m_tree.insertFormattingElement(token);
907         return;
908     }
909     if (token.name() == nobrTag) {
910         m_tree.reconstructTheActiveFormattingElements();
911         if (m_tree.openElements()->inScope(nobrTag)) {
912             parseError(token);
913             processFakeEndTag(nobrTag);
914             m_tree.reconstructTheActiveFormattingElements();
915         }
916         m_tree.insertFormattingElement(token);
917         return;
918     }
919     if (token.name() == appletTag
920         || token.name() == marqueeTag
921         || token.name() == objectTag) {
922         m_tree.reconstructTheActiveFormattingElements();
923         m_tree.insertHTMLElement(token);
924         m_tree.activeFormattingElements()->appendMarker();
925         m_framesetOk = false;
926         return;
927     }
928     if (token.name() == tableTag) {
929         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
930             processFakeEndTag(pTag);
931         m_tree.insertHTMLElement(token);
932         m_framesetOk = false;
933         setInsertionMode(InTableMode);
934         return;
935     }
936     if (token.name() == imageTag) {
937         parseError(token);
938         // Apparently we're not supposed to ask.
939         token.setName(imgTag.localName());
940         prepareToReprocessToken();
941         // Note the fall through to the imgTag handling below!
942     }
943     if (token.name() == areaTag
944         || token.name() == brTag
945         || token.name() == embedTag
946         || token.name() == imgTag
947         || token.name() == keygenTag
948         || token.name() == wbrTag) {
949         m_tree.reconstructTheActiveFormattingElements();
950         m_tree.insertSelfClosingHTMLElement(token);
951         m_framesetOk = false;
952         return;
953     }
954     if (token.name() == inputTag) {
955         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
956         m_tree.reconstructTheActiveFormattingElements();
957         m_tree.insertSelfClosingHTMLElement(token);
958         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
959             m_framesetOk = false;
960         return;
961     }
962     if (token.name() == paramTag
963         || token.name() == sourceTag
964         || token.name() == trackTag) {
965         m_tree.insertSelfClosingHTMLElement(token);
966         return;
967     }
968     if (token.name() == hrTag) {
969         processFakePEndTagIfPInButtonScope();
970         m_tree.insertSelfClosingHTMLElement(token);
971         m_framesetOk = false;
972         return;
973     }
974     if (token.name() == isindexTag) {
975         processIsindexStartTagForInBody(token);
976         return;
977     }
978     if (token.name() == textareaTag) {
979         m_tree.insertHTMLElement(token);
980         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
981         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
982         m_originalInsertionMode = m_insertionMode;
983         m_framesetOk = false;
984         setInsertionMode(TextMode);
985         return;
986     }
987     if (token.name() == xmpTag) {
988         processFakePEndTagIfPInButtonScope();
989         m_tree.reconstructTheActiveFormattingElements();
990         m_framesetOk = false;
991         processGenericRawTextStartTag(token);
992         return;
993     }
994     if (token.name() == iframeTag) {
995         m_framesetOk = false;
996         processGenericRawTextStartTag(token);
997         return;
998     }
999     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
1000         processGenericRawTextStartTag(token);
1001         return;
1002     }
1003     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
1004         processGenericRawTextStartTag(token);
1005         return;
1006     }
1007     if (token.name() == selectTag) {
1008         m_tree.reconstructTheActiveFormattingElements();
1009         m_tree.insertHTMLElement(token);
1010         m_framesetOk = false;
1011         if (m_insertionMode == InTableMode
1012              || m_insertionMode == InCaptionMode
1013              || m_insertionMode == InColumnGroupMode
1014              || m_insertionMode == InTableBodyMode
1015              || m_insertionMode == InRowMode
1016              || m_insertionMode == InCellMode)
1017             setInsertionMode(InSelectInTableMode);
1018         else
1019             setInsertionMode(InSelectMode);
1020         return;
1021     }
1022     if (token.name() == optgroupTag || token.name() == optionTag) {
1023         if (m_tree.openElements()->inScope(optionTag.localName())) {
1024             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1025             processEndTag(endOption);
1026         }
1027         m_tree.reconstructTheActiveFormattingElements();
1028         m_tree.insertHTMLElement(token);
1029         return;
1030     }
1031     if (token.name() == rpTag || token.name() == rtTag) {
1032         if (m_tree.openElements()->inScope(rubyTag.localName())) {
1033             m_tree.generateImpliedEndTags();
1034             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
1035                 parseError(token);
1036                 m_tree.openElements()->popUntil(rubyTag.localName());
1037             }
1038         }
1039         m_tree.insertHTMLElement(token);
1040         return;
1041     }
1042     if (token.name() == MathMLNames::mathTag.localName()) {
1043         m_tree.reconstructTheActiveFormattingElements();
1044         adjustMathMLAttributes(token);
1045         adjustForeignAttributes(token);
1046         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1047         if (m_insertionMode != InForeignContentMode)
1048             setInsertionMode(InForeignContentMode);
1049         return;
1050     }
1051     if (token.name() == SVGNames::svgTag.localName()) {
1052         m_tree.reconstructTheActiveFormattingElements();
1053         adjustSVGAttributes(token);
1054         adjustForeignAttributes(token);
1055         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1056         if (m_insertionMode != InForeignContentMode)
1057             setInsertionMode(InForeignContentMode);
1058         return;
1059     }
1060     if (isCaptionColOrColgroupTag(token.name())
1061         || token.name() == frameTag
1062         || token.name() == headTag
1063         || isTableBodyContextTag(token.name())
1064         || isTableCellContextTag(token.name())
1065         || token.name() == trTag) {
1066         parseError(token);
1067         return;
1068     }
1069     m_tree.reconstructTheActiveFormattingElements();
1070     m_tree.insertHTMLElement(token);
1071 }
1072
1073 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1074 {
1075     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1076         ASSERT(isParsingFragment());
1077         // FIXME: parse error
1078         return false;
1079     }
1080     m_tree.openElements()->pop();
1081     setInsertionMode(InTableMode);
1082     return true;
1083 }
1084
1085 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1086 void HTMLTreeBuilder::closeTheCell()
1087 {
1088     ASSERT(insertionMode() == InCellMode);
1089     if (m_tree.openElements()->inTableScope(tdTag)) {
1090         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1091         processFakeEndTag(tdTag);
1092         return;
1093     }
1094     ASSERT(m_tree.openElements()->inTableScope(thTag));
1095     processFakeEndTag(thTag);
1096     ASSERT(insertionMode() == InRowMode);
1097 }
1098
1099 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1100 {
1101     ASSERT(token.type() == HTMLToken::StartTag);
1102     if (token.name() == captionTag) {
1103         m_tree.openElements()->popUntilTableScopeMarker();
1104         m_tree.activeFormattingElements()->appendMarker();
1105         m_tree.insertHTMLElement(token);
1106         setInsertionMode(InCaptionMode);
1107         return;
1108     }
1109     if (token.name() == colgroupTag) {
1110         m_tree.openElements()->popUntilTableScopeMarker();
1111         m_tree.insertHTMLElement(token);
1112         setInsertionMode(InColumnGroupMode);
1113         return;
1114     }
1115     if (token.name() == colTag) {
1116         processFakeStartTag(colgroupTag);
1117         ASSERT(InColumnGroupMode);
1118         reprocessStartTag(token);
1119         return;
1120     }
1121     if (isTableBodyContextTag(token.name())) {
1122         m_tree.openElements()->popUntilTableScopeMarker();
1123         m_tree.insertHTMLElement(token);
1124         setInsertionMode(InTableBodyMode);
1125         return;
1126     }
1127     if (isTableCellContextTag(token.name())
1128         || token.name() == trTag) {
1129         processFakeStartTag(tbodyTag);
1130         ASSERT(insertionMode() == InTableBodyMode);
1131         reprocessStartTag(token);
1132         return;
1133     }
1134     if (token.name() == tableTag) {
1135         parseError(token);
1136         if (!processTableEndTagForInTable()) {
1137             ASSERT(isParsingFragment());
1138             return;
1139         }
1140         reprocessStartTag(token);
1141         return;
1142     }
1143     if (token.name() == styleTag || token.name() == scriptTag) {
1144         processStartTagForInHead(token);
1145         return;
1146     }
1147     if (token.name() == inputTag) {
1148         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1149         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1150             parseError(token);
1151             m_tree.insertSelfClosingHTMLElement(token);
1152             return;
1153         }
1154         // Fall through to "anything else" case.
1155     }
1156     if (token.name() == formTag) {
1157         parseError(token);
1158         if (m_tree.form())
1159             return;
1160         m_tree.insertHTMLFormElement(token, true);
1161         m_tree.openElements()->pop();
1162         return;
1163     }
1164     parseError(token);
1165     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1166     processStartTagForInBody(token);
1167 }
1168
1169 namespace {
1170
1171 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement)
1172 {
1173     ASSERT(token.type() == HTMLToken::StartTag);
1174     if (currentElement->hasTagName(MathMLNames::miTag)
1175         || currentElement->hasTagName(MathMLNames::moTag)
1176         || currentElement->hasTagName(MathMLNames::mnTag)
1177         || currentElement->hasTagName(MathMLNames::msTag)
1178         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1179         return token.name() != MathMLNames::mglyphTag
1180             && token.name() != MathMLNames::malignmarkTag;
1181     }
1182     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1183         return token.name() == SVGNames::svgTag;
1184     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1185         || currentElement->hasTagName(SVGNames::descTag)
1186         || currentElement->hasTagName(SVGNames::titleTag))
1187         return true;
1188     return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
1189 }
1190
1191 }
1192
1193 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1194 {
1195     ASSERT(token.type() == HTMLToken::StartTag);
1196     switch (insertionMode()) {
1197     case InitialMode:
1198         ASSERT(insertionMode() == InitialMode);
1199         defaultForInitial();
1200         // Fall through.
1201     case BeforeHTMLMode:
1202         ASSERT(insertionMode() == BeforeHTMLMode);
1203         if (token.name() == htmlTag) {
1204             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1205             setInsertionMode(BeforeHeadMode);
1206             return;
1207         }
1208         defaultForBeforeHTML();
1209         // Fall through.
1210     case BeforeHeadMode:
1211         ASSERT(insertionMode() == BeforeHeadMode);
1212         if (token.name() == htmlTag) {
1213             m_tree.insertHTMLHtmlStartTagInBody(token);
1214             return;
1215         }
1216         if (token.name() == headTag) {
1217             m_tree.insertHTMLHeadElement(token);
1218             setInsertionMode(InHeadMode);
1219             return;
1220         }
1221         defaultForBeforeHead();
1222         // Fall through.
1223     case InHeadMode:
1224         ASSERT(insertionMode() == InHeadMode);
1225         if (processStartTagForInHead(token))
1226             return;
1227         defaultForInHead();
1228         // Fall through.
1229     case AfterHeadMode:
1230         ASSERT(insertionMode() == AfterHeadMode);
1231         if (token.name() == htmlTag) {
1232             m_tree.insertHTMLHtmlStartTagInBody(token);
1233             return;
1234         }
1235         if (token.name() == bodyTag) {
1236             m_framesetOk = false;
1237             m_tree.insertHTMLBodyElement(token);
1238             setInsertionMode(InBodyMode);
1239             return;
1240         }
1241         if (token.name() == framesetTag) {
1242             m_tree.insertHTMLElement(token);
1243             setInsertionMode(InFramesetMode);
1244             return;
1245         }
1246         if (token.name() == baseTag
1247             || token.name() == basefontTag
1248             || token.name() == bgsoundTag
1249             || token.name() == linkTag
1250             || token.name() == metaTag
1251             || token.name() == noframesTag
1252             || token.name() == scriptTag
1253             || token.name() == styleTag
1254             || token.name() == titleTag) {
1255             parseError(token);
1256             ASSERT(m_tree.head());
1257             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1258             processStartTagForInHead(token);
1259             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1260             return;
1261         }
1262         if (token.name() == headTag) {
1263             parseError(token);
1264             return;
1265         }
1266         defaultForAfterHead();
1267         // Fall through
1268     case InBodyMode:
1269         ASSERT(insertionMode() == InBodyMode);
1270         processStartTagForInBody(token);
1271         break;
1272     case InTableMode:
1273         ASSERT(insertionMode() == InTableMode);
1274         processStartTagForInTable(token);
1275         break;
1276     case InCaptionMode:
1277         ASSERT(insertionMode() == InCaptionMode);
1278         if (isCaptionColOrColgroupTag(token.name())
1279             || isTableBodyContextTag(token.name())
1280             || isTableCellContextTag(token.name())
1281             || token.name() == trTag) {
1282             parseError(token);
1283             if (!processCaptionEndTagForInCaption()) {
1284                 ASSERT(isParsingFragment());
1285                 return;
1286             }
1287             reprocessStartTag(token);
1288             return;
1289         }
1290         processStartTagForInBody(token);
1291         break;
1292     case InColumnGroupMode:
1293         ASSERT(insertionMode() == InColumnGroupMode);
1294         if (token.name() == htmlTag) {
1295             m_tree.insertHTMLHtmlStartTagInBody(token);
1296             return;
1297         }
1298         if (token.name() == colTag) {
1299             m_tree.insertSelfClosingHTMLElement(token);
1300             return;
1301         }
1302         if (!processColgroupEndTagForInColumnGroup()) {
1303             ASSERT(isParsingFragment());
1304             return;
1305         }
1306         reprocessStartTag(token);
1307         break;
1308     case InTableBodyMode:
1309         ASSERT(insertionMode() == InTableBodyMode);
1310         if (token.name() == trTag) {
1311             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1312             m_tree.insertHTMLElement(token);
1313             setInsertionMode(InRowMode);
1314             return;
1315         }
1316         if (isTableCellContextTag(token.name())) {
1317             parseError(token);
1318             processFakeStartTag(trTag);
1319             ASSERT(insertionMode() == InRowMode);
1320             reprocessStartTag(token);
1321             return;
1322         }
1323         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1324             // FIXME: This is slow.
1325             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1326                 ASSERT(isParsingFragment());
1327                 parseError(token);
1328                 return;
1329             }
1330             m_tree.openElements()->popUntilTableBodyScopeMarker();
1331             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1332             processFakeEndTag(m_tree.currentElement()->tagQName());
1333             reprocessStartTag(token);
1334             return;
1335         }
1336         processStartTagForInTable(token);
1337         break;
1338     case InRowMode:
1339         ASSERT(insertionMode() == InRowMode);
1340         if (isTableCellContextTag(token.name())) {
1341             m_tree.openElements()->popUntilTableRowScopeMarker();
1342             m_tree.insertHTMLElement(token);
1343             setInsertionMode(InCellMode);
1344             m_tree.activeFormattingElements()->appendMarker();
1345             return;
1346         }
1347         if (token.name() == trTag
1348             || isCaptionColOrColgroupTag(token.name())
1349             || isTableBodyContextTag(token.name())) {
1350             if (!processTrEndTagForInRow()) {
1351                 ASSERT(isParsingFragment());
1352                 return;
1353             }
1354             ASSERT(insertionMode() == InTableBodyMode);
1355             reprocessStartTag(token);
1356             return;
1357         }
1358         processStartTagForInTable(token);
1359         break;
1360     case InCellMode:
1361         ASSERT(insertionMode() == InCellMode);
1362         if (isCaptionColOrColgroupTag(token.name())
1363             || isTableCellContextTag(token.name())
1364             || token.name() == trTag
1365             || isTableBodyContextTag(token.name())) {
1366             // FIXME: This could be more efficient.
1367             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1368                 ASSERT(isParsingFragment());
1369                 parseError(token);
1370                 return;
1371             }
1372             closeTheCell();
1373             reprocessStartTag(token);
1374             return;
1375         }
1376         processStartTagForInBody(token);
1377         break;
1378     case AfterBodyMode:
1379     case AfterAfterBodyMode:
1380         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1381         if (token.name() == htmlTag) {
1382             m_tree.insertHTMLHtmlStartTagInBody(token);
1383             return;
1384         }
1385         setInsertionMode(InBodyMode);
1386         reprocessStartTag(token);
1387         break;
1388     case InHeadNoscriptMode:
1389         ASSERT(insertionMode() == InHeadNoscriptMode);
1390         if (token.name() == htmlTag) {
1391             m_tree.insertHTMLHtmlStartTagInBody(token);
1392             return;
1393         }
1394         if (token.name() == basefontTag
1395             || token.name() == bgsoundTag
1396             || token.name() == linkTag
1397             || token.name() == metaTag
1398             || token.name() == noframesTag
1399             || token.name() == styleTag) {
1400             bool didProcess = processStartTagForInHead(token);
1401             ASSERT_UNUSED(didProcess, didProcess);
1402             return;
1403         }
1404         if (token.name() == htmlTag || token.name() == noscriptTag) {
1405             parseError(token);
1406             return;
1407         }
1408         defaultForInHeadNoscript();
1409         processToken(token);
1410         break;
1411     case InFramesetMode:
1412         ASSERT(insertionMode() == InFramesetMode);
1413         if (token.name() == htmlTag) {
1414             m_tree.insertHTMLHtmlStartTagInBody(token);
1415             return;
1416         }
1417         if (token.name() == framesetTag) {
1418             m_tree.insertHTMLElement(token);
1419             return;
1420         }
1421         if (token.name() == frameTag) {
1422             m_tree.insertSelfClosingHTMLElement(token);
1423             return;
1424         }
1425         if (token.name() == noframesTag) {
1426             processStartTagForInHead(token);
1427             return;
1428         }
1429         parseError(token);
1430         break;
1431     case AfterFramesetMode:
1432     case AfterAfterFramesetMode:
1433         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1434         if (token.name() == htmlTag) {
1435             m_tree.insertHTMLHtmlStartTagInBody(token);
1436             return;
1437         }
1438         if (token.name() == noframesTag) {
1439             processStartTagForInHead(token);
1440             return;
1441         }
1442         parseError(token);
1443         break;
1444     case InSelectInTableMode:
1445         ASSERT(insertionMode() == InSelectInTableMode);
1446         if (token.name() == captionTag
1447             || token.name() == tableTag
1448             || isTableBodyContextTag(token.name())
1449             || token.name() == trTag
1450             || isTableCellContextTag(token.name())) {
1451             parseError(token);
1452             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1453             processEndTag(endSelect);
1454             reprocessStartTag(token);
1455             return;
1456         }
1457         // Fall through
1458     case InSelectMode:
1459         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1460         if (token.name() == htmlTag) {
1461             m_tree.insertHTMLHtmlStartTagInBody(token);
1462             return;
1463         }
1464         if (token.name() == optionTag) {
1465             if (m_tree.currentElement()->hasTagName(optionTag)) {
1466                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1467                 processEndTag(endOption);
1468             }
1469             m_tree.insertHTMLElement(token);
1470             return;
1471         }
1472         if (token.name() == optgroupTag) {
1473             if (m_tree.currentElement()->hasTagName(optionTag)) {
1474                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1475                 processEndTag(endOption);
1476             }
1477             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1478                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1479                 processEndTag(endOptgroup);
1480             }
1481             m_tree.insertHTMLElement(token);
1482             return;
1483         }
1484         if (token.name() == selectTag) {
1485             parseError(token);
1486             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1487             processEndTag(endSelect);
1488             return;
1489         }
1490         if (token.name() == inputTag
1491             || token.name() == keygenTag
1492             || token.name() == textareaTag) {
1493             parseError(token);
1494             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1495                 ASSERT(isParsingFragment());
1496                 return;
1497             }
1498             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1499             processEndTag(endSelect);
1500             reprocessStartTag(token);
1501             return;
1502         }
1503         if (token.name() == scriptTag) {
1504             bool didProcess = processStartTagForInHead(token);
1505             ASSERT_UNUSED(didProcess, didProcess);
1506             return;
1507         }
1508         break;
1509     case InTableTextMode:
1510         defaultForInTableText();
1511         processStartTag(token);
1512         break;
1513     case InForeignContentMode: {
1514         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentElement())) {
1515             processForeignContentUsingInBodyModeAndResetMode(token);
1516             return;
1517         }
1518         if (token.name() == bTag
1519             || token.name() == bigTag
1520             || token.name() == blockquoteTag
1521             || token.name() == bodyTag
1522             || token.name() == brTag
1523             || token.name() == centerTag
1524             || token.name() == codeTag
1525             || token.name() == ddTag
1526             || token.name() == divTag
1527             || token.name() == dlTag
1528             || token.name() == dtTag
1529             || token.name() == emTag
1530             || token.name() == embedTag
1531             || isNumberedHeaderTag(token.name())
1532             || token.name() == headTag
1533             || token.name() == hrTag
1534             || token.name() == iTag
1535             || token.name() == imgTag
1536             || token.name() == liTag
1537             || token.name() == listingTag
1538             || token.name() == menuTag
1539             || token.name() == metaTag
1540             || token.name() == nobrTag
1541             || token.name() == olTag
1542             || token.name() == pTag
1543             || token.name() == preTag
1544             || token.name() == rubyTag
1545             || token.name() == sTag
1546             || token.name() == smallTag
1547             || token.name() == spanTag
1548             || token.name() == strongTag
1549             || token.name() == strikeTag
1550             || token.name() == subTag
1551             || token.name() == supTag
1552             || token.name() == tableTag
1553             || token.name() == ttTag
1554             || token.name() == uTag
1555             || token.name() == ulTag
1556             || token.name() == varTag
1557             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1558             parseError(token);
1559             m_tree.openElements()->popUntilForeignContentScopeMarker();
1560             resetInsertionModeAppropriately();
1561             reprocessStartTag(token);
1562             return;
1563         }
1564         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1565         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1566             adjustMathMLAttributes(token);
1567         if (currentNamespace == SVGNames::svgNamespaceURI) {
1568             adjustSVGTagNameCase(token);
1569             adjustSVGAttributes(token);
1570         }
1571         adjustForeignAttributes(token);
1572         m_tree.insertForeignElement(token, currentNamespace);
1573         break;
1574     }
1575     case TextMode:
1576         ASSERT_NOT_REACHED();
1577         break;
1578     }
1579 }
1580
1581 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1582 {
1583     ASSERT(token.type() == HTMLToken::EndTag);
1584     ASSERT(token.name() == bodyTag);
1585     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1586         parseError(token);
1587         return false;
1588     }
1589     notImplemented(); // Emit a more specific parse error based on stack contents.
1590     setInsertionMode(AfterBodyMode);
1591     return true;
1592 }
1593
1594 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1595 {
1596     ASSERT(token.type() == HTMLToken::EndTag);
1597     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1598     while (1) {
1599         Element* node = record->element();
1600         if (node->hasLocalName(token.name())) {
1601             m_tree.generateImpliedEndTags();
1602             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1603                 parseError(token);
1604                 // FIXME: This is either a bug in the spec, or a bug in our
1605                 // implementation.  Filed a bug with HTML5:
1606                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1607                 // We might have already popped the node for the token in
1608                 // generateImpliedEndTags, just abort.
1609                 if (!m_tree.openElements()->contains(node))
1610                     return;
1611             }
1612             m_tree.openElements()->popUntilPopped(node);
1613             return;
1614         }
1615         if (isSpecialNode(node)) {
1616             parseError(token);
1617             return;
1618         }
1619         record = record->next();
1620     }
1621 }
1622
1623 // FIXME: This probably belongs on HTMLElementStack.
1624 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1625 {
1626     HTMLElementStack::ElementRecord* furthestBlock = 0;
1627     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1628     for (; record; record = record->next()) {
1629         if (record->element() == formattingElement)
1630             return furthestBlock;
1631         if (isSpecialNode(record->element()))
1632             furthestBlock = record;
1633     }
1634     ASSERT_NOT_REACHED();
1635     return 0;
1636 }
1637
1638 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1639 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1640 {
1641     // The adoption agency algorithm is N^2.  We limit the number of iterations
1642     // to stop from hanging the whole browser.  This limit is copied from the
1643     // legacy tree builder and might need to be tweaked in the future.
1644     static const int adoptionAgencyIterationLimit = 10;
1645
1646     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1647         // 1.
1648         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1649         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1650             parseError(token);
1651             notImplemented(); // Check the stack of open elements for a more specific parse error.
1652             return;
1653         }
1654         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1655         if (!formattingElementRecord) {
1656             parseError(token);
1657             m_tree.activeFormattingElements()->remove(formattingElement);
1658             return;
1659         }
1660         if (formattingElement != m_tree.currentElement())
1661             parseError(token);
1662         // 2.
1663         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1664         // 3.
1665         if (!furthestBlock) {
1666             m_tree.openElements()->popUntilPopped(formattingElement);
1667             m_tree.activeFormattingElements()->remove(formattingElement);
1668             return;
1669         }
1670         // 4.
1671         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1672         Element* commonAncestor = formattingElementRecord->next()->element();
1673         // 5.
1674         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1675         // 6.
1676         HTMLElementStack::ElementRecord* node = furthestBlock;
1677         HTMLElementStack::ElementRecord* nextNode = node->next();
1678         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1679         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1680             // 6.1
1681             node = nextNode;
1682             ASSERT(node);
1683             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1684             // 6.2
1685             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1686                 m_tree.openElements()->remove(node->element());
1687                 node = 0;
1688                 continue;
1689             }
1690             // 6.3
1691             if (node == formattingElementRecord)
1692                 break;
1693             // 6.5
1694             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1695             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1696             nodeEntry->replaceElement(newElement.get());
1697             node->replaceElement(newElement.release());
1698             // 6.4 -- Intentionally out of order to handle the case where node
1699             // was replaced in 6.5.
1700             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1701             if (lastNode == furthestBlock)
1702                 bookmark.moveToAfter(nodeEntry);
1703             // 6.6
1704             if (Element* parent = lastNode->element()->parentElement())
1705                 parent->parserRemoveChild(lastNode->element());
1706             node->element()->parserAddChild(lastNode->element());
1707             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1708                 lastNode->element()->lazyAttach();
1709             // 6.7
1710             lastNode = node;
1711         }
1712         // 7
1713         const AtomicString& commonAncestorTag = commonAncestor->localName();
1714         if (Element* parent = lastNode->element()->parentElement())
1715             parent->parserRemoveChild(lastNode->element());
1716         // FIXME: If this moves to HTMLConstructionSite, this check should use
1717         // causesFosterParenting(tagName) instead.
1718         if (commonAncestorTag == tableTag
1719             || commonAncestorTag == trTag
1720             || isTableBodyContextTag(commonAncestorTag))
1721             m_tree.fosterParent(lastNode->element());
1722         else {
1723             commonAncestor->parserAddChild(lastNode->element());
1724             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1725                 lastNode->element()->lazyAttach();
1726         }
1727         // 8
1728         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1729         // 9
1730         newElement->takeAllChildrenFrom(furthestBlock->element());
1731         // 10
1732         Element* furthestBlockElement = furthestBlock->element();
1733         // FIXME: All this creation / parserAddChild / attach business should
1734         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1735         //        should all be in some HTMLConstructionSite function.
1736         furthestBlockElement->parserAddChild(newElement);
1737         if (furthestBlockElement->attached() && !newElement->attached()) {
1738             // Notice that newElement might already be attached if, for example, one of the reparented
1739             // children is a style element, which attaches itself automatically.
1740             newElement->attach();
1741         }
1742         // 11
1743         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1744         // 12
1745         m_tree.openElements()->remove(formattingElement);
1746         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1747     }
1748 }
1749
1750 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1751 {
1752     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1753     bool last = false;
1754     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1755     while (1) {
1756         Element* node = nodeRecord->element();
1757         if (node == m_tree.openElements()->bottom()) {
1758             ASSERT(isParsingFragment());
1759             last = true;
1760             node = m_fragmentContext.contextElement();
1761         }
1762         if (node->hasTagName(selectTag)) {
1763             ASSERT(isParsingFragment());
1764             return setInsertionMode(InSelectMode);
1765         }
1766         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1767             return setInsertionMode(InCellMode);
1768         if (node->hasTagName(trTag))
1769             return setInsertionMode(InRowMode);
1770         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1771             return setInsertionMode(InTableBodyMode);
1772         if (node->hasTagName(captionTag))
1773             return setInsertionMode(InCaptionMode);
1774         if (node->hasTagName(colgroupTag)) {
1775             ASSERT(isParsingFragment());
1776             return setInsertionMode(InColumnGroupMode);
1777         }
1778         if (node->hasTagName(tableTag))
1779             return setInsertionMode(InTableMode);
1780         if (node->hasTagName(headTag)) {
1781             ASSERT(isParsingFragment());
1782             return setInsertionMode(InBodyMode);
1783         }
1784         if (node->hasTagName(bodyTag))
1785             return setInsertionMode(InBodyMode);
1786         if (node->hasTagName(framesetTag)) {
1787             ASSERT(isParsingFragment());
1788             return setInsertionMode(InFramesetMode);
1789         }
1790         if (node->hasTagName(htmlTag)) {
1791             ASSERT(isParsingFragment());
1792             return setInsertionMode(BeforeHeadMode);
1793         }
1794         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1795             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1796             return setInsertionMode(InForeignContentMode);
1797         if (last) {
1798             ASSERT(isParsingFragment());
1799             return setInsertionMode(InBodyMode);
1800         }
1801         nodeRecord = nodeRecord->next();
1802     }
1803 }
1804
1805 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1806 {
1807     ASSERT(token.type() == HTMLToken::EndTag);
1808     if (isTableBodyContextTag(token.name())) {
1809         if (!m_tree.openElements()->inTableScope(token.name())) {
1810             parseError(token);
1811             return;
1812         }
1813         m_tree.openElements()->popUntilTableBodyScopeMarker();
1814         m_tree.openElements()->pop();
1815         setInsertionMode(InTableMode);
1816         return;
1817     }
1818     if (token.name() == tableTag) {
1819         // FIXME: This is slow.
1820         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1821             ASSERT(isParsingFragment());
1822             parseError(token);
1823             return;
1824         }
1825         m_tree.openElements()->popUntilTableBodyScopeMarker();
1826         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1827         processFakeEndTag(m_tree.currentElement()->tagQName());
1828         reprocessEndTag(token);
1829         return;
1830     }
1831     if (token.name() == bodyTag
1832         || isCaptionColOrColgroupTag(token.name())
1833         || token.name() == htmlTag
1834         || isTableCellContextTag(token.name())
1835         || token.name() == trTag) {
1836         parseError(token);
1837         return;
1838     }
1839     processEndTagForInTable(token);
1840 }
1841
1842 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1843 {
1844     ASSERT(token.type() == HTMLToken::EndTag);
1845     if (token.name() == trTag) {
1846         processTrEndTagForInRow();
1847         return;
1848     }
1849     if (token.name() == tableTag) {
1850         if (!processTrEndTagForInRow()) {
1851             ASSERT(isParsingFragment());
1852             return;
1853         }
1854         ASSERT(insertionMode() == InTableBodyMode);
1855         reprocessEndTag(token);
1856         return;
1857     }
1858     if (isTableBodyContextTag(token.name())) {
1859         if (!m_tree.openElements()->inTableScope(token.name())) {
1860             parseError(token);
1861             return;
1862         }
1863         processFakeEndTag(trTag);
1864         ASSERT(insertionMode() == InTableBodyMode);
1865         reprocessEndTag(token);
1866         return;
1867     }
1868     if (token.name() == bodyTag
1869         || isCaptionColOrColgroupTag(token.name())
1870         || token.name() == htmlTag
1871         || isTableCellContextTag(token.name())) {
1872         parseError(token);
1873         return;
1874     }
1875     processEndTagForInTable(token);
1876 }
1877
1878 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1879 {
1880     ASSERT(token.type() == HTMLToken::EndTag);
1881     if (isTableCellContextTag(token.name())) {
1882         if (!m_tree.openElements()->inTableScope(token.name())) {
1883             parseError(token);
1884             return;
1885         }
1886         m_tree.generateImpliedEndTags();
1887         if (!m_tree.currentElement()->hasLocalName(token.name()))
1888             parseError(token);
1889         m_tree.openElements()->popUntilPopped(token.name());
1890         m_tree.activeFormattingElements()->clearToLastMarker();
1891         setInsertionMode(InRowMode);
1892         return;
1893     }
1894     if (token.name() == bodyTag
1895         || isCaptionColOrColgroupTag(token.name())
1896         || token.name() == htmlTag) {
1897         parseError(token);
1898         return;
1899     }
1900     if (token.name() == tableTag
1901         || token.name() == trTag
1902         || isTableBodyContextTag(token.name())) {
1903         if (!m_tree.openElements()->inTableScope(token.name())) {
1904             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1905             parseError(token);
1906             return;
1907         }
1908         closeTheCell();
1909         reprocessEndTag(token);
1910         return;
1911     }
1912     processEndTagForInBody(token);
1913 }
1914
1915 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1916 {
1917     ASSERT(token.type() == HTMLToken::EndTag);
1918     if (token.name() == bodyTag) {
1919         processBodyEndTagForInBody(token);
1920         return;
1921     }
1922     if (token.name() == htmlTag) {
1923         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1924         if (processBodyEndTagForInBody(endBody))
1925             reprocessEndTag(token);
1926         return;
1927     }
1928     if (token.name() == addressTag
1929         || token.name() == articleTag
1930         || token.name() == asideTag
1931         || token.name() == blockquoteTag
1932         || token.name() == buttonTag
1933         || token.name() == centerTag
1934         || token.name() == detailsTag
1935         || token.name() == dirTag
1936         || token.name() == divTag
1937         || token.name() == dlTag
1938         || token.name() == fieldsetTag
1939         || token.name() == figcaptionTag
1940         || token.name() == figureTag
1941         || token.name() == footerTag
1942         || token.name() == headerTag
1943         || token.name() == hgroupTag
1944         || token.name() == listingTag
1945         || token.name() == menuTag
1946         || token.name() == navTag
1947         || token.name() == olTag
1948         || token.name() == preTag
1949         || token.name() == sectionTag
1950         || token.name() == summaryTag
1951         || token.name() == ulTag) {
1952         if (!m_tree.openElements()->inScope(token.name())) {
1953             parseError(token);
1954             return;
1955         }
1956         m_tree.generateImpliedEndTags();
1957         if (!m_tree.currentElement()->hasLocalName(token.name()))
1958             parseError(token);
1959         m_tree.openElements()->popUntilPopped(token.name());
1960         return;
1961     }
1962     if (token.name() == formTag) {
1963         RefPtr<Element> node = m_tree.takeForm();
1964         if (!node || !m_tree.openElements()->inScope(node.get())) {
1965             parseError(token);
1966             return;
1967         }
1968         m_tree.generateImpliedEndTags();
1969         if (m_tree.currentElement() != node.get())
1970             parseError(token);
1971         m_tree.openElements()->remove(node.get());
1972     }
1973     if (token.name() == pTag) {
1974         if (!m_tree.openElements()->inButtonScope(token.name())) {
1975             parseError(token);
1976             processFakeStartTag(pTag);
1977             ASSERT(m_tree.openElements()->inScope(token.name()));
1978             reprocessEndTag(token);
1979             return;
1980         }
1981         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1982         if (!m_tree.currentElement()->hasLocalName(token.name()))
1983             parseError(token);
1984         m_tree.openElements()->popUntilPopped(token.name());
1985         return;
1986     }
1987     if (token.name() == liTag) {
1988         if (!m_tree.openElements()->inListItemScope(token.name())) {
1989             parseError(token);
1990             return;
1991         }
1992         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1993         if (!m_tree.currentElement()->hasLocalName(token.name()))
1994             parseError(token);
1995         m_tree.openElements()->popUntilPopped(token.name());
1996         return;
1997     }
1998     if (token.name() == ddTag
1999         || token.name() == dtTag) {
2000         if (!m_tree.openElements()->inScope(token.name())) {
2001             parseError(token);
2002             return;
2003         }
2004         m_tree.generateImpliedEndTagsWithExclusion(token.name());
2005         if (!m_tree.currentElement()->hasLocalName(token.name()))
2006             parseError(token);
2007         m_tree.openElements()->popUntilPopped(token.name());
2008         return;
2009     }
2010     if (isNumberedHeaderTag(token.name())) {
2011         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
2012             parseError(token);
2013             return;
2014         }
2015         m_tree.generateImpliedEndTags();
2016         if (!m_tree.currentElement()->hasLocalName(token.name()))
2017             parseError(token);
2018         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
2019         return;
2020     }
2021     if (isFormattingTag(token.name())) {
2022         callTheAdoptionAgency(token);
2023         return;
2024     }
2025     if (token.name() == appletTag
2026         || token.name() == marqueeTag
2027         || token.name() == objectTag) {
2028         if (!m_tree.openElements()->inScope(token.name())) {
2029             parseError(token);
2030             return;
2031         }
2032         m_tree.generateImpliedEndTags();
2033         if (!m_tree.currentElement()->hasLocalName(token.name()))
2034             parseError(token);
2035         m_tree.openElements()->popUntilPopped(token.name());
2036         m_tree.activeFormattingElements()->clearToLastMarker();
2037         return;
2038     }
2039     if (token.name() == brTag) {
2040         parseError(token);
2041         processFakeStartTag(brTag);
2042         return;
2043     }
2044     processAnyOtherEndTagForInBody(token);
2045 }
2046
2047 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2048 {
2049     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2050         ASSERT(isParsingFragment());
2051         // FIXME: parse error
2052         return false;
2053     }
2054     m_tree.generateImpliedEndTags();
2055     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2056     m_tree.openElements()->popUntilPopped(captionTag.localName());
2057     m_tree.activeFormattingElements()->clearToLastMarker();
2058     setInsertionMode(InTableMode);
2059     return true;
2060 }
2061
2062 bool HTMLTreeBuilder::processTrEndTagForInRow()
2063 {
2064     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2065         ASSERT(isParsingFragment());
2066         // FIXME: parse error
2067         return false;
2068     }
2069     m_tree.openElements()->popUntilTableRowScopeMarker();
2070     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2071     m_tree.openElements()->pop();
2072     setInsertionMode(InTableBodyMode);
2073     return true;
2074 }
2075
2076 bool HTMLTreeBuilder::processTableEndTagForInTable()
2077 {
2078     if (!m_tree.openElements()->inTableScope(tableTag)) {
2079         ASSERT(isParsingFragment());
2080         // FIXME: parse error.
2081         return false;
2082     }
2083     m_tree.openElements()->popUntilPopped(tableTag.localName());
2084     resetInsertionModeAppropriately();
2085     return true;
2086 }
2087
2088 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2089 {
2090     ASSERT(token.type() == HTMLToken::EndTag);
2091     if (token.name() == tableTag) {
2092         processTableEndTagForInTable();
2093         return;
2094     }
2095     if (token.name() == bodyTag
2096         || isCaptionColOrColgroupTag(token.name())
2097         || token.name() == htmlTag
2098         || isTableBodyContextTag(token.name())
2099         || isTableCellContextTag(token.name())
2100         || token.name() == trTag) {
2101         parseError(token);
2102         return;
2103     }
2104     // Is this redirection necessary here?
2105     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2106     processEndTagForInBody(token);
2107 }
2108
2109 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2110 {
2111     ASSERT(token.type() == HTMLToken::EndTag);
2112     switch (insertionMode()) {
2113     case InitialMode:
2114         ASSERT(insertionMode() == InitialMode);
2115         defaultForInitial();
2116         // Fall through.
2117     case BeforeHTMLMode:
2118         ASSERT(insertionMode() == BeforeHTMLMode);
2119         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2120             parseError(token);
2121             return;
2122         }
2123         defaultForBeforeHTML();
2124         // Fall through.
2125     case BeforeHeadMode:
2126         ASSERT(insertionMode() == BeforeHeadMode);
2127         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2128             parseError(token);
2129             return;
2130         }
2131         defaultForBeforeHead();
2132         // Fall through.
2133     case InHeadMode:
2134         ASSERT(insertionMode() == InHeadMode);
2135         if (token.name() == headTag) {
2136             m_tree.openElements()->popHTMLHeadElement();
2137             setInsertionMode(AfterHeadMode);
2138             return;
2139         }
2140         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2141             parseError(token);
2142             return;
2143         }
2144         defaultForInHead();
2145         // Fall through.
2146     case AfterHeadMode:
2147         ASSERT(insertionMode() == AfterHeadMode);
2148         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2149             parseError(token);
2150             return;
2151         }
2152         defaultForAfterHead();
2153         // Fall through
2154     case InBodyMode:
2155         ASSERT(insertionMode() == InBodyMode);
2156         processEndTagForInBody(token);
2157         break;
2158     case InTableMode:
2159         ASSERT(insertionMode() == InTableMode);
2160         processEndTagForInTable(token);
2161         break;
2162     case InCaptionMode:
2163         ASSERT(insertionMode() == InCaptionMode);
2164         if (token.name() == captionTag) {
2165             processCaptionEndTagForInCaption();
2166             return;
2167         }
2168         if (token.name() == tableTag) {
2169             parseError(token);
2170             if (!processCaptionEndTagForInCaption()) {
2171                 ASSERT(isParsingFragment());
2172                 return;
2173             }
2174             reprocessEndTag(token);
2175             return;
2176         }
2177         if (token.name() == bodyTag
2178             || token.name() == colTag
2179             || token.name() == colgroupTag
2180             || token.name() == htmlTag
2181             || isTableBodyContextTag(token.name())
2182             || isTableCellContextTag(token.name())
2183             || token.name() == trTag) {
2184             parseError(token);
2185             return;
2186         }
2187         processEndTagForInBody(token);
2188         break;
2189     case InColumnGroupMode:
2190         ASSERT(insertionMode() == InColumnGroupMode);
2191         if (token.name() == colgroupTag) {
2192             processColgroupEndTagForInColumnGroup();
2193             return;
2194         }
2195         if (token.name() == colTag) {
2196             parseError(token);
2197             return;
2198         }
2199         if (!processColgroupEndTagForInColumnGroup()) {
2200             ASSERT(isParsingFragment());
2201             return;
2202         }
2203         reprocessEndTag(token);
2204         break;
2205     case InRowMode:
2206         ASSERT(insertionMode() == InRowMode);
2207         processEndTagForInRow(token);
2208         break;
2209     case InCellMode:
2210         ASSERT(insertionMode() == InCellMode);
2211         processEndTagForInCell(token);
2212         break;
2213     case InTableBodyMode:
2214         ASSERT(insertionMode() == InTableBodyMode);
2215         processEndTagForInTableBody(token);
2216         break;
2217     case AfterBodyMode:
2218         ASSERT(insertionMode() == AfterBodyMode);
2219         if (token.name() == htmlTag) {
2220             if (isParsingFragment()) {
2221                 parseError(token);
2222                 return;
2223             }
2224             setInsertionMode(AfterAfterBodyMode);
2225             return;
2226         }
2227         prepareToReprocessToken();
2228         // Fall through.
2229     case AfterAfterBodyMode:
2230         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2231         parseError(token);
2232         setInsertionMode(InBodyMode);
2233         reprocessEndTag(token);
2234         break;
2235     case InHeadNoscriptMode:
2236         ASSERT(insertionMode() == InHeadNoscriptMode);
2237         if (token.name() == noscriptTag) {
2238             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2239             m_tree.openElements()->pop();
2240             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2241             setInsertionMode(InHeadMode);
2242             return;
2243         }
2244         if (token.name() != brTag) {
2245             parseError(token);
2246             return;
2247         }
2248         defaultForInHeadNoscript();
2249         processToken(token);
2250         break;
2251     case TextMode:
2252         if (token.name() == scriptTag) {
2253             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2254             m_isPaused = true;
2255             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2256             m_scriptToProcess = m_tree.currentElement();
2257             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2258             m_tree.openElements()->pop();
2259             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2260                 m_scriptToProcess->removeAllChildren();
2261             setInsertionMode(m_originalInsertionMode);
2262
2263             // This token will not have been created by the tokenizer if a
2264             // self-closing script tag was encountered and pre-HTML5 parser
2265             // quirks are enabled. We must set the tokenizer's state to
2266             // DataState explicitly if the tokenizer didn't have a chance to.
2267             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2268             m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2269             return;
2270         }
2271         m_tree.openElements()->pop();
2272         setInsertionMode(m_originalInsertionMode);
2273         break;
2274     case InFramesetMode:
2275         ASSERT(insertionMode() == InFramesetMode);
2276         if (token.name() == framesetTag) {
2277             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2278                 parseError(token);
2279                 return;
2280             }
2281             m_tree.openElements()->pop();
2282             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2283                 setInsertionMode(AfterFramesetMode);
2284             return;
2285         }
2286         break;
2287     case AfterFramesetMode:
2288         ASSERT(insertionMode() == AfterFramesetMode);
2289         if (token.name() == htmlTag) {
2290             setInsertionMode(AfterAfterFramesetMode);
2291             return;
2292         }
2293         // Fall through.
2294     case AfterAfterFramesetMode:
2295         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2296         parseError(token);
2297         break;
2298     case InSelectInTableMode:
2299         ASSERT(insertionMode() == InSelectInTableMode);
2300         if (token.name() == captionTag
2301             || token.name() == tableTag
2302             || isTableBodyContextTag(token.name())
2303             || token.name() == trTag
2304             || isTableCellContextTag(token.name())) {
2305             parseError(token);
2306             if (m_tree.openElements()->inTableScope(token.name())) {
2307                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2308                 processEndTag(endSelect);
2309                 reprocessEndTag(token);
2310             }
2311             return;
2312         }
2313         // Fall through.
2314     case InSelectMode:
2315         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2316         if (token.name() == optgroupTag) {
2317             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2318                 processFakeEndTag(optionTag);
2319             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2320                 m_tree.openElements()->pop();
2321                 return;
2322             }
2323             parseError(token);
2324             return;
2325         }
2326         if (token.name() == optionTag) {
2327             if (m_tree.currentElement()->hasTagName(optionTag)) {
2328                 m_tree.openElements()->pop();
2329                 return;
2330             }
2331             parseError(token);
2332             return;
2333         }
2334         if (token.name() == selectTag) {
2335             if (!m_tree.openElements()->inSelectScope(token.name())) {
2336                 ASSERT(isParsingFragment());
2337                 parseError(token);
2338                 return;
2339             }
2340             m_tree.openElements()->popUntilPopped(selectTag.localName());
2341             resetInsertionModeAppropriately();
2342             return;
2343         }
2344         break;
2345     case InTableTextMode:
2346         defaultForInTableText();
2347         processEndTag(token);
2348         break;
2349     case InForeignContentMode:
2350         if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
2351             notImplemented();
2352             return;
2353         }
2354         if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
2355             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2356             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2357             if (!nodeRecord->element()->hasLocalName(token.name()))
2358                 parseError(token);
2359             while (1) {
2360                 if (nodeRecord->element()->hasLocalName(token.name())) {
2361                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2362                     resetForeignInsertionMode();
2363                     return;
2364                 }
2365                 nodeRecord = nodeRecord->next();
2366                 if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
2367                     break;
2368             }
2369         }
2370         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2371         processForeignContentUsingInBodyModeAndResetMode(token);
2372         break;
2373     }
2374 }
2375
2376 void HTMLTreeBuilder::prepareToReprocessToken()
2377 {
2378     if (m_hasPendingForeignInsertionModeSteps) {
2379         resetForeignInsertionMode();
2380         m_hasPendingForeignInsertionModeSteps = false;
2381     }
2382 }
2383
2384 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2385 {
2386     prepareToReprocessToken();
2387     processStartTag(token);
2388 }
2389
2390 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2391 {
2392     prepareToReprocessToken();
2393     processEndTag(token);
2394 }
2395
2396 class HTMLTreeBuilder::FakeInsertionMode {
2397     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2398 public:
2399     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2400         : m_treeBuilder(treeBuilder)
2401         , m_originalMode(treeBuilder->insertionMode())
2402     {
2403         m_treeBuilder->setFakeInsertionMode(mode);
2404     }
2405
2406     ~FakeInsertionMode()
2407     {
2408         if (m_treeBuilder->isFakeInsertionMode())
2409             m_treeBuilder->setInsertionMode(m_originalMode);
2410     }
2411
2412 private:
2413     HTMLTreeBuilder* m_treeBuilder;
2414     InsertionMode m_originalMode;
2415 };
2416
2417 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2418 {
2419     m_hasPendingForeignInsertionModeSteps = true;
2420     {
2421         FakeInsertionMode fakeMode(this, InBodyMode);
2422         processToken(token);
2423     }
2424     if (m_hasPendingForeignInsertionModeSteps)
2425         resetForeignInsertionMode();
2426 }
2427
2428 void HTMLTreeBuilder::resetForeignInsertionMode()
2429 {
2430     if (insertionMode() == InForeignContentMode)
2431         resetInsertionModeAppropriately();
2432 }
2433
2434 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2435 {
2436     ASSERT(token.type() == HTMLToken::Comment);
2437     if (m_insertionMode == InitialMode
2438         || m_insertionMode == BeforeHTMLMode
2439         || m_insertionMode == AfterAfterBodyMode
2440         || m_insertionMode == AfterAfterFramesetMode) {
2441         m_tree.insertCommentOnDocument(token);
2442         return;
2443     }
2444     if (m_insertionMode == AfterBodyMode) {
2445         m_tree.insertCommentOnHTMLHtmlElement(token);
2446         return;
2447     }
2448     if (m_insertionMode == InTableTextMode) {
2449         defaultForInTableText();
2450         processComment(token);
2451         return;
2452     }
2453     m_tree.insertComment(token);
2454 }
2455
2456 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2457 {
2458     ASSERT(token.type() == HTMLToken::Character);
2459     ExternalCharacterTokenBuffer buffer(token);
2460     processCharacterBuffer(buffer);
2461 }
2462
2463 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2464 {
2465 ReprocessBuffer:
2466     switch (insertionMode()) {
2467     case InitialMode: {
2468         ASSERT(insertionMode() == InitialMode);
2469         buffer.skipLeadingWhitespace();
2470         if (buffer.isEmpty())
2471             return;
2472         defaultForInitial();
2473         // Fall through.
2474     }
2475     case BeforeHTMLMode: {
2476         ASSERT(insertionMode() == BeforeHTMLMode);
2477         buffer.skipLeadingWhitespace();
2478         if (buffer.isEmpty())
2479             return;
2480         defaultForBeforeHTML();
2481         // Fall through.
2482     }
2483     case BeforeHeadMode: {
2484         ASSERT(insertionMode() == BeforeHeadMode);
2485         buffer.skipLeadingWhitespace();
2486         if (buffer.isEmpty())
2487             return;
2488         defaultForBeforeHead();
2489         // Fall through.
2490     }
2491     case InHeadMode: {
2492         ASSERT(insertionMode() == InHeadMode);
2493         String leadingWhitespace = buffer.takeLeadingWhitespace();
2494         if (!leadingWhitespace.isEmpty())
2495             m_tree.insertTextNode(leadingWhitespace);
2496         if (buffer.isEmpty())
2497             return;
2498         defaultForInHead();
2499         // Fall through.
2500     }
2501     case AfterHeadMode: {
2502         ASSERT(insertionMode() == AfterHeadMode);
2503         String leadingWhitespace = buffer.takeLeadingWhitespace();
2504         if (!leadingWhitespace.isEmpty())
2505             m_tree.insertTextNode(leadingWhitespace);
2506         if (buffer.isEmpty())
2507             return;
2508         defaultForAfterHead();
2509         // Fall through.
2510     }
2511     case InBodyMode:
2512     case InCaptionMode:
2513     case InCellMode: {
2514         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2515         m_tree.reconstructTheActiveFormattingElements();
2516         String characters = buffer.takeRemaining();
2517         m_tree.insertTextNode(characters);
2518         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2519             m_framesetOk = false;
2520         break;
2521     }
2522     case InTableMode:
2523     case InTableBodyMode:
2524     case InRowMode: {
2525         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2526         ASSERT(m_pendingTableCharacters.isEmpty());
2527         m_originalInsertionMode = m_insertionMode;
2528         setInsertionMode(InTableTextMode);
2529         prepareToReprocessToken();
2530         // Fall through.
2531     }
2532     case InTableTextMode: {
2533         buffer.giveRemainingTo(m_pendingTableCharacters);
2534         break;
2535     }
2536     case InColumnGroupMode: {
2537         ASSERT(insertionMode() == InColumnGroupMode);
2538         String leadingWhitespace = buffer.takeLeadingWhitespace();
2539         if (!leadingWhitespace.isEmpty())
2540             m_tree.insertTextNode(leadingWhitespace);
2541         if (buffer.isEmpty())
2542             return;
2543         if (!processColgroupEndTagForInColumnGroup()) {
2544             ASSERT(isParsingFragment());
2545             // The spec tells us to drop these characters on the floor.
2546             buffer.takeLeadingNonWhitespace();
2547             if (buffer.isEmpty())
2548                 return;
2549         }
2550         prepareToReprocessToken();
2551         goto ReprocessBuffer;
2552     }
2553     case AfterBodyMode:
2554     case AfterAfterBodyMode: {
2555         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2556         // FIXME: parse error
2557         setInsertionMode(InBodyMode);
2558         prepareToReprocessToken();
2559         goto ReprocessBuffer;
2560         break;
2561     }
2562     case TextMode: {
2563         ASSERT(insertionMode() == TextMode);
2564         m_tree.insertTextNode(buffer.takeRemaining());
2565         break;
2566     }
2567     case InHeadNoscriptMode: {
2568         ASSERT(insertionMode() == InHeadNoscriptMode);
2569         String leadingWhitespace = buffer.takeLeadingWhitespace();
2570         if (!leadingWhitespace.isEmpty())
2571             m_tree.insertTextNode(leadingWhitespace);
2572         if (buffer.isEmpty())
2573             return;
2574         defaultForInHeadNoscript();
2575         goto ReprocessBuffer;
2576         break;
2577     }
2578     case InFramesetMode:
2579     case AfterFramesetMode: {
2580         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2581         String leadingWhitespace = buffer.takeRemainingWhitespace();
2582         if (!leadingWhitespace.isEmpty())
2583             m_tree.insertTextNode(leadingWhitespace);
2584         // FIXME: We should generate a parse error if we skipped over any
2585         // non-whitespace characters.
2586         break;
2587     }
2588     case InSelectInTableMode:
2589     case InSelectMode: {
2590         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2591         m_tree.insertTextNode(buffer.takeRemaining());
2592         break;
2593     }
2594     case InForeignContentMode: {
2595         ASSERT(insertionMode() == InForeignContentMode);
2596         String characters = buffer.takeRemaining();
2597         m_tree.insertTextNode(characters);
2598         if (m_framesetOk && !isAllWhitespace(characters))
2599             m_framesetOk = false;
2600         break;
2601     }
2602     case AfterAfterFramesetMode: {
2603         String leadingWhitespace = buffer.takeRemainingWhitespace();
2604         if (!leadingWhitespace.isEmpty()) {
2605             m_tree.reconstructTheActiveFormattingElements();
2606             m_tree.insertTextNode(leadingWhitespace);
2607         }
2608         // FIXME: We should generate a parse error if we skipped over any
2609         // non-whitespace characters.
2610         break;
2611     }
2612     }
2613 }
2614
2615 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2616 {
2617     ASSERT(token.type() == HTMLToken::EndOfFile);
2618     switch (insertionMode()) {
2619     case InitialMode:
2620         ASSERT(insertionMode() == InitialMode);
2621         defaultForInitial();
2622         // Fall through.
2623     case BeforeHTMLMode:
2624         ASSERT(insertionMode() == BeforeHTMLMode);
2625         defaultForBeforeHTML();
2626         // Fall through.
2627     case BeforeHeadMode:
2628         ASSERT(insertionMode() == BeforeHeadMode);
2629         defaultForBeforeHead();
2630         // Fall through.
2631     case InHeadMode:
2632         ASSERT(insertionMode() == InHeadMode);
2633         defaultForInHead();
2634         // Fall through.
2635     case AfterHeadMode:
2636         ASSERT(insertionMode() == AfterHeadMode);
2637         defaultForAfterHead();
2638         // Fall through
2639     case InBodyMode:
2640     case InCellMode:
2641     case InCaptionMode:
2642     case InRowMode:
2643         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2644         notImplemented(); // Emit parse error based on what elements are still open.
2645         break;
2646     case AfterBodyMode:
2647     case AfterAfterBodyMode:
2648         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2649         break;
2650     case InHeadNoscriptMode:
2651         ASSERT(insertionMode() == InHeadNoscriptMode);
2652         defaultForInHeadNoscript();
2653         processEndOfFile(token);
2654         return;
2655     case AfterFramesetMode:
2656     case AfterAfterFramesetMode:
2657         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2658         break;
2659     case InFramesetMode:
2660     case InTableMode:
2661     case InTableBodyMode:
2662     case InSelectInTableMode:
2663     case InSelectMode:
2664         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2665         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2666             parseError(token);
2667         break;
2668     case InColumnGroupMode:
2669         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2670             ASSERT(isParsingFragment());
2671             return; // FIXME: Should we break here instead of returning?
2672         }
2673         if (!processColgroupEndTagForInColumnGroup()) {
2674             ASSERT(isParsingFragment());
2675             return; // FIXME: Should we break here instead of returning?
2676         }
2677         prepareToReprocessToken();
2678         processEndOfFile(token);
2679         return;
2680     case InForeignContentMode:
2681         setInsertionMode(InBodyMode);
2682         processEndOfFile(token);
2683         return;
2684     case InTableTextMode:
2685         defaultForInTableText();
2686         processEndOfFile(token);
2687         return;
2688     case TextMode:
2689         parseError(token);
2690         if (m_tree.currentElement()->hasTagName(scriptTag))
2691             notImplemented(); // mark the script element as "already started".
2692         m_tree.openElements()->pop();
2693         setInsertionMode(m_originalInsertionMode);
2694         prepareToReprocessToken();
2695         processEndOfFile(token);
2696         return;
2697     }
2698     ASSERT(m_tree.openElements()->top());
2699     m_tree.openElements()->popAll();
2700 }
2701
2702 void HTMLTreeBuilder::defaultForInitial()
2703 {
2704     notImplemented();
2705     if (!m_fragmentContext.fragment())
2706         m_document->setCompatibilityMode(Document::QuirksMode);
2707     // FIXME: parse error
2708     setInsertionMode(BeforeHTMLMode);
2709     prepareToReprocessToken();
2710 }
2711
2712 void HTMLTreeBuilder::defaultForBeforeHTML()
2713 {
2714     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2715     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2716     setInsertionMode(BeforeHeadMode);
2717     prepareToReprocessToken();
2718 }
2719
2720 void HTMLTreeBuilder::defaultForBeforeHead()
2721 {
2722     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2723     processStartTag(startHead);
2724     prepareToReprocessToken();
2725 }
2726
2727 void HTMLTreeBuilder::defaultForInHead()
2728 {
2729     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2730     processEndTag(endHead);
2731     prepareToReprocessToken();
2732 }
2733
2734 void HTMLTreeBuilder::defaultForInHeadNoscript()
2735 {
2736     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2737     processEndTag(endNoscript);
2738     prepareToReprocessToken();
2739 }
2740
2741 void HTMLTreeBuilder::defaultForAfterHead()
2742 {
2743     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2744     processStartTag(startBody);
2745     m_framesetOk = true;
2746     prepareToReprocessToken();
2747 }
2748
2749 void HTMLTreeBuilder::defaultForInTableText()
2750 {
2751     String characters = String::adopt(m_pendingTableCharacters);
2752     if (!isAllWhitespace(characters)) {
2753         // FIXME: parse error
2754         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2755         m_tree.reconstructTheActiveFormattingElements();
2756         m_tree.insertTextNode(characters);
2757         m_framesetOk = false;
2758         setInsertionMode(m_originalInsertionMode);
2759         prepareToReprocessToken();
2760         return;
2761     }
2762     m_tree.insertTextNode(characters);
2763     setInsertionMode(m_originalInsertionMode);
2764     prepareToReprocessToken();
2765 }
2766
2767 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2768 {
2769     ASSERT(token.type() == HTMLToken::StartTag);
2770     if (token.name() == htmlTag) {
2771         m_tree.insertHTMLHtmlStartTagInBody(token);
2772         return true;
2773     }
2774     if (token.name() == baseTag
2775         || token.name() == basefontTag
2776         || token.name() == bgsoundTag
2777         || token.name() == commandTag
2778         || token.name() == linkTag
2779         || token.name() == metaTag) {
2780         m_tree.insertSelfClosingHTMLElement(token);
2781         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2782         return true;
2783     }
2784     if (token.name() == titleTag) {
2785         processGenericRCDATAStartTag(token);
2786         return true;
2787     }
2788     if (token.name() == noscriptTag) {
2789         if (scriptEnabled(m_document->frame())) {
2790             processGenericRawTextStartTag(token);
2791             return true;
2792         }
2793         m_tree.insertHTMLElement(token);
2794         setInsertionMode(InHeadNoscriptMode);
2795         return true;
2796     }
2797     if (token.name() == noframesTag || token.name() == styleTag) {
2798         processGenericRawTextStartTag(token);
2799         return true;
2800     }
2801     if (token.name() == scriptTag) {
2802         processScriptStartTag(token);
2803         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2804             processFakeEndTag(scriptTag);
2805         return true;
2806     }
2807     if (token.name() == headTag) {
2808         parseError(token);
2809         return true;
2810     }
2811     return false;
2812 }
2813
2814 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2815 {
2816     ASSERT(token.type() == HTMLToken::StartTag);
2817     m_tree.insertHTMLElement(token);
2818     m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2819     m_originalInsertionMode = m_insertionMode;
2820     setInsertionMode(TextMode);
2821 }
2822
2823 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2824 {
2825     ASSERT(token.type() == HTMLToken::StartTag);
2826     m_tree.insertHTMLElement(token);
2827     m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2828     m_originalInsertionMode = m_insertionMode;
2829     setInsertionMode(TextMode);
2830 }
2831
2832 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2833 {
2834     ASSERT(token.type() == HTMLToken::StartTag);
2835     m_tree.insertScriptElement(token);
2836     m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2837     m_originalInsertionMode = m_insertionMode;
2838
2839     TextPosition0 position = m_parser->textPosition();
2840
2841     ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2842
2843     m_lastScriptElementStartPosition = position;
2844
2845     setInsertionMode(TextMode);
2846 }
2847
2848 void HTMLTreeBuilder::finished()
2849 {
2850     ASSERT(m_document);
2851     if (isParsingFragment()) {
2852         m_fragmentContext.finished();
2853         return;
2854     }
2855
2856     // Warning, this may detach the parser. Do not do anything else after this.
2857     m_document->finishedParsing();
2858 }
2859
2860 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2861 {
2862     if (!frame)
2863         return false;
2864     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2865 }
2866
2867 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2868 {
2869     if (!frame)
2870         return false;
2871     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2872 }
2873
2874 }