2011-03-02 Sheriff Bot <webkit.review.bot@gmail.com>
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "Comment.h"
30 #include "DOMWindow.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLDocumentParser.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLParserIdioms.h"
41 #include "HTMLScriptElement.h"
42 #include "HTMLToken.h"
43 #include "HTMLTokenizer.h"
44 #include "LocalizedStrings.h"
45 #include "MathMLNames.h"
46 #include "NotImplemented.h"
47 #include "SVGNames.h"
48 #include "ScriptController.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 static const int uninitializedLineNumberValue = -1;
60
61 static TextPosition1 uninitializedPositionValue1()
62 {
63     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
64 }
65
66 namespace {
67
68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70     return isHTMLSpace(character) || character == replacementCharacter;
71 }
72
73 inline bool isAllWhitespace(const String& string)
74 {
75     return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77
78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82
83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85     return tagName == h1Tag
86         || tagName == h2Tag
87         || tagName == h3Tag
88         || tagName == h4Tag
89         || tagName == h5Tag
90         || tagName == h6Tag;
91 }
92
93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95     return tagName == captionTag
96         || tagName == colTag
97         || tagName == colgroupTag;
98 }
99
100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102     return tagName == thTag || tagName == tdTag;
103 }
104
105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107     return tagName == tbodyTag
108         || tagName == tfootTag
109         || tagName == theadTag;
110 }
111
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
113 bool isSpecialNode(Node* node)
114 {
115     if (node->hasTagName(MathMLNames::miTag)
116         || node->hasTagName(MathMLNames::moTag)
117         || node->hasTagName(MathMLNames::mnTag)
118         || node->hasTagName(MathMLNames::msTag)
119         || node->hasTagName(MathMLNames::mtextTag)
120         || node->hasTagName(MathMLNames::annotation_xmlTag)
121         || node->hasTagName(SVGNames::foreignObjectTag)
122         || node->hasTagName(SVGNames::descTag)
123         || node->hasTagName(SVGNames::titleTag))
124         return true;
125     if (node->namespaceURI() != xhtmlNamespaceURI)
126         return false;
127     const AtomicString& tagName = node->localName();
128     return tagName == addressTag
129         || tagName == appletTag
130         || tagName == areaTag
131         || tagName == articleTag
132         || tagName == asideTag
133         || tagName == baseTag
134         || tagName == basefontTag
135         || tagName == bgsoundTag
136         || tagName == blockquoteTag
137         || tagName == bodyTag
138         || tagName == brTag
139         || tagName == buttonTag
140         || tagName == captionTag
141         || tagName == centerTag
142         || tagName == colTag
143         || tagName == colgroupTag
144         || tagName == commandTag
145         || tagName == ddTag
146         || tagName == detailsTag
147         || tagName == dirTag
148         || tagName == divTag
149         || tagName == dlTag
150         || tagName == dtTag
151         || tagName == embedTag
152         || tagName == fieldsetTag
153         || tagName == figcaptionTag
154         || tagName == figureTag
155         || tagName == footerTag
156         || tagName == formTag
157         || tagName == frameTag
158         || tagName == framesetTag
159         || isNumberedHeaderTag(tagName)
160         || tagName == headTag
161         || tagName == headerTag
162         || tagName == hgroupTag
163         || tagName == hrTag
164         || tagName == htmlTag
165         || tagName == iframeTag
166         || tagName == imgTag
167         || tagName == inputTag
168         || tagName == isindexTag
169         || tagName == liTag
170         || tagName == linkTag
171         || tagName == listingTag
172         || tagName == marqueeTag
173         || tagName == menuTag
174         || tagName == metaTag
175         || tagName == navTag
176         || tagName == noembedTag
177         || tagName == noframesTag
178         || tagName == noscriptTag
179         || tagName == objectTag
180         || tagName == olTag
181         || tagName == pTag
182         || tagName == paramTag
183         || tagName == plaintextTag
184         || tagName == preTag
185         || tagName == scriptTag
186         || tagName == sectionTag
187         || tagName == selectTag
188         || tagName == styleTag
189         || tagName == summaryTag
190         || tagName == tableTag
191         || isTableBodyContextTag(tagName)
192         || tagName == tdTag
193         || tagName == textareaTag
194         || tagName == thTag
195         || tagName == titleTag
196         || tagName == trTag
197         || tagName == ulTag
198         || tagName == wbrTag
199         || tagName == xmpTag;
200 }
201
202 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
203 {
204     return tagName == bTag
205         || tagName == bigTag
206         || tagName == codeTag
207         || tagName == emTag
208         || tagName == fontTag
209         || tagName == iTag
210         || tagName == sTag
211         || tagName == smallTag
212         || tagName == strikeTag
213         || tagName == strongTag
214         || tagName == ttTag
215         || tagName == uTag;
216 }
217
218 bool isNonAnchorFormattingTag(const AtomicString& tagName)
219 {
220     return tagName == nobrTag
221         || isNonAnchorNonNobrFormattingTag(tagName);
222 }
223
224 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
225 bool isFormattingTag(const AtomicString& tagName)
226 {
227     return tagName == aTag || isNonAnchorFormattingTag(tagName);
228 }
229
230 HTMLFormElement* closestFormAncestor(Element* element)
231 {
232     while (element) {
233         if (element->hasTagName(formTag))
234             return static_cast<HTMLFormElement*>(element);
235         ContainerNode* parent = element->parentNode();
236         if (!parent || !parent->isElementNode())
237             return 0;
238         element = static_cast<Element*>(parent);
239     }
240     return 0;
241 }
242
243 } // namespace
244
245 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
246     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
247 public:
248     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
249         : m_current(token.characters().data())
250         , m_end(m_current + token.characters().size())
251     {
252         ASSERT(!isEmpty());
253     }
254
255     explicit ExternalCharacterTokenBuffer(const String& string)
256         : m_current(string.characters())
257         , m_end(m_current + string.length())
258     {
259         ASSERT(!isEmpty());
260     }
261
262     ~ExternalCharacterTokenBuffer()
263     {
264         ASSERT(isEmpty());
265     }
266
267     bool isEmpty() const { return m_current == m_end; }
268
269     void skipLeadingWhitespace()
270     {
271         skipLeading<isHTMLSpace>();
272     }
273
274     String takeLeadingWhitespace()
275     {
276         return takeLeading<isHTMLSpace>();
277     }
278
279     String takeLeadingNonWhitespace()
280     {
281         return takeLeading<isNotHTMLSpace>();
282     }
283
284     String takeRemaining()
285     {
286         ASSERT(!isEmpty());
287         const UChar* start = m_current;
288         m_current = m_end;
289         return String(start, m_current - start);
290     }
291
292     void giveRemainingTo(Vector<UChar>& recipient)
293     {
294         recipient.append(m_current, m_end - m_current);
295         m_current = m_end;
296     }
297
298     String takeRemainingWhitespace()
299     {
300         ASSERT(!isEmpty());
301         Vector<UChar> whitespace;
302         do {
303             UChar cc = *m_current++;
304             if (isHTMLSpace(cc))
305                 whitespace.append(cc);
306         } while (m_current < m_end);
307         // Returning the null string when there aren't any whitespace
308         // characters is slightly cleaner semantically because we don't want
309         // to insert a text node (as opposed to inserting an empty text node).
310         if (whitespace.isEmpty())
311             return String();
312         return String::adopt(whitespace);
313     }
314
315 private:
316     template<bool characterPredicate(UChar)>
317     void skipLeading()
318     {
319         ASSERT(!isEmpty());
320         while (characterPredicate(*m_current)) {
321             if (++m_current == m_end)
322                 return;
323         }
324     }
325
326     template<bool characterPredicate(UChar)>
327     String takeLeading()
328     {
329         ASSERT(!isEmpty());
330         const UChar* start = m_current;
331         skipLeading<characterPredicate>();
332         if (start == m_current)
333             return String();
334         return String(start, m_current - start);
335     }
336
337     const UChar* m_current;
338     const UChar* m_end;
339 };
340
341
342 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
343     : m_framesetOk(true)
344     , m_document(document)
345     , m_tree(document)
346     , m_reportErrors(reportErrors)
347     , m_isPaused(false)
348     , m_insertionMode(InitialMode)
349     , m_originalInsertionMode(InitialMode)
350     , m_parser(parser)
351     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
352     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
353     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
354     , m_hasPendingForeignInsertionModeSteps(false)
355 {
356 }
357
358 // FIXME: Member variables should be grouped into self-initializing structs to
359 // minimize code duplication between these constructors.
360 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
361     : m_framesetOk(true)
362     , m_fragmentContext(fragment, contextElement, scriptingPermission)
363     , m_document(fragment->document())
364     , m_tree(fragment, scriptingPermission)
365     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
366     , m_isPaused(false)
367     , m_insertionMode(InitialMode)
368     , m_originalInsertionMode(InitialMode)
369     , m_parser(parser)
370     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
371     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
372     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
373     , m_hasPendingForeignInsertionModeSteps(false)
374 {
375     if (contextElement) {
376         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
377         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
378         processFakeStartTag(htmlTag);
379         resetInsertionModeAppropriately();
380         m_tree.setForm(closestFormAncestor(contextElement));
381     }
382 }
383
384 HTMLTreeBuilder::~HTMLTreeBuilder()
385 {
386 }
387
388 void HTMLTreeBuilder::detach()
389 {
390     // This call makes little sense in fragment mode, but for consistency
391     // DocumentParser expects detach() to always be called before it's destroyed.
392     m_document = 0;
393     // HTMLConstructionSite might be on the callstack when detach() is called
394     // otherwise we'd just call m_tree.clear() here instead.
395     m_tree.detach();
396 }
397
398 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
399     : m_fragment(0)
400     , m_contextElement(0)
401     , m_scriptingPermission(FragmentScriptingAllowed)
402 {
403 }
404
405 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
406     : m_fragment(fragment)
407     , m_contextElement(contextElement)
408     , m_scriptingPermission(scriptingPermission)
409 {
410     ASSERT(!fragment->hasChildNodes());
411 }
412
413 void HTMLTreeBuilder::FragmentParsingContext::finished()
414 {
415     if (!m_contextElement)
416         return;
417     
418     // The HTML5 spec says to return the children of the fragment's document
419     // element when there is a context element (10.4.7).
420     RefPtr<ContainerNode> documentElement = firstElementChild(m_fragment);
421     m_fragment->removeChildren();
422     ASSERT(documentElement);
423     m_fragment->takeAllChildrenFrom(documentElement.get());
424 }
425
426 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
427 {
428 }
429
430 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
431 {
432     // Unpause ourselves, callers may pause us again when processing the script.
433     // The HTML5 spec is written as though scripts are executed inside the tree
434     // builder.  We pause the parser to exit the tree builder, and then resume
435     // before running scripts.
436     m_isPaused = false;
437     scriptStartPosition = m_scriptToProcessStartPosition;
438     m_scriptToProcessStartPosition = uninitializedPositionValue1();
439     return m_scriptToProcess.release();
440 }
441
442 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
443 {
444     AtomicHTMLToken token(rawToken);
445     constructTreeFromAtomicToken(token);
446 }
447
448 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
449 {
450     processToken(token);
451
452     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
453     // the U+0000 characters into replacement characters has compatibility
454     // problems.
455     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
456     m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
457 }
458
459 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
460 {
461     switch (token.type()) {
462     case HTMLToken::Uninitialized:
463         ASSERT_NOT_REACHED();
464         break;
465     case HTMLToken::DOCTYPE:
466         processDoctypeToken(token);
467         break;
468     case HTMLToken::StartTag:
469         processStartTag(token);
470         break;
471     case HTMLToken::EndTag:
472         processEndTag(token);
473         break;
474     case HTMLToken::Comment:
475         processComment(token);
476         return;
477     case HTMLToken::Character:
478         processCharacter(token);
479         break;
480     case HTMLToken::EndOfFile:
481         processEndOfFile(token);
482         break;
483     }
484 }
485
486 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
487 {
488     ASSERT(token.type() == HTMLToken::DOCTYPE);
489     if (m_insertionMode == InitialMode) {
490         m_tree.insertDoctype(token);
491         setInsertionMode(BeforeHTMLMode);
492         return;
493     }
494     if (m_insertionMode == InTableTextMode) {
495         defaultForInTableText();
496         processDoctypeToken(token);
497         return;
498     }
499     parseError(token);
500 }
501
502 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
503 {
504     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
505     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
506     processStartTag(fakeToken);
507 }
508
509 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
510 {
511     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
512     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
513     processEndTag(fakeToken);
514 }
515
516 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
517 {
518     ASSERT(!characters.isEmpty());
519     ExternalCharacterTokenBuffer buffer(characters);
520     processCharacterBuffer(buffer);
521 }
522
523 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
524 {
525     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
526         return;
527     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
528     processEndTag(endP);
529 }
530
531 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
532 {
533     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
534     if (!attributes)
535         attributes = NamedNodeMap::create();
536     else {
537         attributes->removeAttribute(nameAttr);
538         attributes->removeAttribute(actionAttr);
539         attributes->removeAttribute(promptAttr);
540     }
541
542     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
543     attributes->insertAttribute(mappedAttribute.release(), false);
544     return attributes.release();
545 }
546
547 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
548 {
549     ASSERT(token.type() == HTMLToken::StartTag);
550     ASSERT(token.name() == isindexTag);
551     parseError(token);
552     if (m_tree.form())
553         return;
554     notImplemented(); // Acknowledge self-closing flag
555     processFakeStartTag(formTag);
556     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
557     if (actionAttribute) {
558         ASSERT(m_tree.currentElement()->hasTagName(formTag));
559         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
560     }
561     processFakeStartTag(hrTag);
562     processFakeStartTag(labelTag);
563     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
564     if (promptAttribute)
565         processFakeCharacters(promptAttribute->value());
566     else
567         processFakeCharacters(searchableIndexIntroduction());
568     processFakeStartTag(inputTag, attributesForIsindexInput(token));
569     notImplemented(); // This second set of characters may be needed by non-english locales.
570     processFakeEndTag(labelTag);
571     processFakeStartTag(hrTag);
572     processFakeEndTag(formTag);
573 }
574
575 namespace {
576
577 bool isLi(const Element* element)
578 {
579     return element->hasTagName(liTag);
580 }
581
582 bool isDdOrDt(const Element* element)
583 {
584     return element->hasTagName(ddTag)
585         || element->hasTagName(dtTag);
586 }
587
588 }
589
590 template <bool shouldClose(const Element*)>
591 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
592 {
593     m_framesetOk = false;
594     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
595     while (1) {
596         Element* node = nodeRecord->element();
597         if (shouldClose(node)) {
598             processFakeEndTag(node->tagQName());
599             break;
600         }
601         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
602             break;
603         nodeRecord = nodeRecord->next();
604     }
605     processFakePEndTagIfPInButtonScope();
606     m_tree.insertHTMLElement(token);
607 }
608
609 namespace {
610
611 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
612
613 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
614 {
615     for (size_t i = 0; i < length; ++i) {
616         const QualifiedName& name = *names[i];
617         const AtomicString& localName = name.localName();
618         AtomicString loweredLocalName = localName.lower();
619         if (loweredLocalName != localName)
620             map->add(loweredLocalName, name);
621     }
622 }
623
624 void adjustSVGTagNameCase(AtomicHTMLToken& token)
625 {
626     static PrefixedNameToQualifiedNameMap* caseMap = 0;
627     if (!caseMap) {
628         caseMap = new PrefixedNameToQualifiedNameMap;
629         size_t length = 0;
630         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
631         mapLoweredLocalNameToName(caseMap, svgTags, length);
632     }
633
634     const QualifiedName& casedName = caseMap->get(token.name());
635     if (casedName.localName().isNull())
636         return;
637     token.setName(casedName.localName());
638 }
639
640 template<QualifiedName** getAttrs(size_t* length)>
641 void adjustAttributes(AtomicHTMLToken& token)
642 {
643     static PrefixedNameToQualifiedNameMap* caseMap = 0;
644     if (!caseMap) {
645         caseMap = new PrefixedNameToQualifiedNameMap;
646         size_t length = 0;
647         QualifiedName** attrs = getAttrs(&length);
648         mapLoweredLocalNameToName(caseMap, attrs, length);
649     }
650
651     NamedNodeMap* attributes = token.attributes();
652     if (!attributes)
653         return;
654
655     for (unsigned x = 0; x < attributes->length(); ++x) {
656         Attribute* attribute = attributes->attributeItem(x);
657         const QualifiedName& casedName = caseMap->get(attribute->localName());
658         if (!casedName.localName().isNull())
659             attribute->parserSetName(casedName);
660     }
661 }
662
663 void adjustSVGAttributes(AtomicHTMLToken& token)
664 {
665     adjustAttributes<SVGNames::getSVGAttrs>(token);
666 }
667
668 void adjustMathMLAttributes(AtomicHTMLToken& token)
669 {
670     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
671 }
672
673 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
674 {
675     for (size_t i = 0; i < length; ++i) {
676         QualifiedName* name = names[i];
677         const AtomicString& localName = name->localName();
678         AtomicString prefixColonLocalName(prefix + ":" + localName);
679         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
680         map->add(prefixColonLocalName, nameWithPrefix);
681     }
682 }
683
684 void adjustForeignAttributes(AtomicHTMLToken& token)
685 {
686     static PrefixedNameToQualifiedNameMap* map = 0;
687     if (!map) {
688         map = new PrefixedNameToQualifiedNameMap;
689         size_t length = 0;
690         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
691         addNamesWithPrefix(map, "xlink", attrs, length);
692
693         attrs = XMLNames::getXMLAttrs(&length);
694         addNamesWithPrefix(map, "xml", attrs, length);
695
696         map->add("xmlns", XMLNSNames::xmlnsAttr);
697         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
698     }
699
700     NamedNodeMap* attributes = token.attributes();
701     if (!attributes)
702         return;
703
704     for (unsigned x = 0; x < attributes->length(); ++x) {
705         Attribute* attribute = attributes->attributeItem(x);
706         const QualifiedName& name = map->get(attribute->localName());
707         if (!name.localName().isNull())
708             attribute->parserSetName(name);
709     }
710 }
711
712 }
713
714 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
715 {
716     ASSERT(token.type() == HTMLToken::StartTag);
717     if (token.name() == htmlTag) {
718         m_tree.insertHTMLHtmlStartTagInBody(token);
719         return;
720     }
721     if (token.name() == baseTag
722         || token.name() == basefontTag
723         || token.name() == bgsoundTag
724         || token.name() == commandTag
725         || token.name() == linkTag
726         || token.name() == metaTag
727         || token.name() == noframesTag
728         || token.name() == scriptTag
729         || token.name() == styleTag
730         || token.name() == titleTag) {
731         bool didProcess = processStartTagForInHead(token);
732         ASSERT_UNUSED(didProcess, didProcess);
733         return;
734     }
735     if (token.name() == bodyTag) {
736         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
737             ASSERT(isParsingFragment());
738             return;
739         }
740         m_tree.insertHTMLBodyStartTagInBody(token);
741         return;
742     }
743     if (token.name() == framesetTag) {
744         parseError(token);
745         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
746             ASSERT(isParsingFragment());
747             return;
748         }
749         if (!m_framesetOk)
750             return;
751         ExceptionCode ec = 0;
752         m_tree.openElements()->bodyElement()->remove(ec);
753         ASSERT(!ec);
754         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
755         m_tree.openElements()->popHTMLBodyElement();
756         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
757         m_tree.insertHTMLElement(token);
758         setInsertionMode(InFramesetMode);
759         return;
760     }
761     if (token.name() == addressTag
762         || token.name() == articleTag
763         || token.name() == asideTag
764         || token.name() == blockquoteTag
765         || token.name() == centerTag
766         || token.name() == detailsTag
767         || token.name() == dirTag
768         || token.name() == divTag
769         || token.name() == dlTag
770         || token.name() == fieldsetTag
771         || token.name() == figcaptionTag
772         || token.name() == figureTag
773         || token.name() == footerTag
774         || token.name() == headerTag
775         || token.name() == hgroupTag
776         || token.name() == menuTag
777         || token.name() == navTag
778         || token.name() == olTag
779         || token.name() == pTag
780         || token.name() == sectionTag
781         || token.name() == summaryTag
782         || token.name() == ulTag) {
783         processFakePEndTagIfPInButtonScope();
784         m_tree.insertHTMLElement(token);
785         return;
786     }
787     if (isNumberedHeaderTag(token.name())) {
788         processFakePEndTagIfPInButtonScope();
789         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
790             parseError(token);
791             m_tree.openElements()->pop();
792         }
793         m_tree.insertHTMLElement(token);
794         return;
795     }
796     if (token.name() == preTag || token.name() == listingTag) {
797         processFakePEndTagIfPInButtonScope();
798         m_tree.insertHTMLElement(token);
799         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
800         m_framesetOk = false;
801         return;
802     }
803     if (token.name() == formTag) {
804         if (m_tree.form()) {
805             parseError(token);
806             return;
807         }
808         processFakePEndTagIfPInButtonScope();
809         m_tree.insertHTMLFormElement(token);
810         return;
811     }
812     if (token.name() == liTag) {
813         processCloseWhenNestedTag<isLi>(token);
814         return;
815     }
816     if (token.name() == ddTag || token.name() == dtTag) {
817         processCloseWhenNestedTag<isDdOrDt>(token);
818         return;
819     }
820     if (token.name() == plaintextTag) {
821         processFakePEndTagIfPInButtonScope();
822         m_tree.insertHTMLElement(token);
823         m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
824         return;
825     }
826     if (token.name() == buttonTag) {
827         if (m_tree.openElements()->inScope(buttonTag)) {
828             parseError(token);
829             processFakeEndTag(buttonTag);
830             reprocessStartTag(token); // FIXME: Could we just fall through here?
831             return;
832         }
833         m_tree.reconstructTheActiveFormattingElements();
834         m_tree.insertHTMLElement(token);
835         m_framesetOk = false;
836         return;
837     }
838     if (token.name() == aTag) {
839         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
840         if (activeATag) {
841             parseError(token);
842             processFakeEndTag(aTag);
843             m_tree.activeFormattingElements()->remove(activeATag);
844             if (m_tree.openElements()->contains(activeATag))
845                 m_tree.openElements()->remove(activeATag);
846         }
847         m_tree.reconstructTheActiveFormattingElements();
848         m_tree.insertFormattingElement(token);
849         return;
850     }
851     if (isNonAnchorNonNobrFormattingTag(token.name())) {
852         m_tree.reconstructTheActiveFormattingElements();
853         m_tree.insertFormattingElement(token);
854         return;
855     }
856     if (token.name() == nobrTag) {
857         m_tree.reconstructTheActiveFormattingElements();
858         if (m_tree.openElements()->inScope(nobrTag)) {
859             parseError(token);
860             processFakeEndTag(nobrTag);
861             m_tree.reconstructTheActiveFormattingElements();
862         }
863         m_tree.insertFormattingElement(token);
864         return;
865     }
866     if (token.name() == appletTag
867         || token.name() == marqueeTag
868         || token.name() == objectTag) {
869         m_tree.reconstructTheActiveFormattingElements();
870         m_tree.insertHTMLElement(token);
871         m_tree.activeFormattingElements()->appendMarker();
872         m_framesetOk = false;
873         return;
874     }
875     if (token.name() == tableTag) {
876         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
877             processFakeEndTag(pTag);
878         m_tree.insertHTMLElement(token);
879         m_framesetOk = false;
880         setInsertionMode(InTableMode);
881         return;
882     }
883     if (token.name() == imageTag) {
884         parseError(token);
885         // Apparently we're not supposed to ask.
886         token.setName(imgTag.localName());
887         prepareToReprocessToken();
888         // Note the fall through to the imgTag handling below!
889     }
890     if (token.name() == areaTag
891         || token.name() == brTag
892         || token.name() == embedTag
893         || token.name() == imgTag
894         || token.name() == keygenTag
895         || token.name() == wbrTag) {
896         m_tree.reconstructTheActiveFormattingElements();
897         m_tree.insertSelfClosingHTMLElement(token);
898         m_framesetOk = false;
899         return;
900     }
901     if (token.name() == inputTag) {
902         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
903         m_tree.reconstructTheActiveFormattingElements();
904         m_tree.insertSelfClosingHTMLElement(token);
905         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
906             m_framesetOk = false;
907         return;
908     }
909     if (token.name() == paramTag
910         || token.name() == sourceTag
911         || token.name() == trackTag) {
912         m_tree.insertSelfClosingHTMLElement(token);
913         return;
914     }
915     if (token.name() == hrTag) {
916         processFakePEndTagIfPInButtonScope();
917         m_tree.insertSelfClosingHTMLElement(token);
918         m_framesetOk = false;
919         return;
920     }
921     if (token.name() == isindexTag) {
922         processIsindexStartTagForInBody(token);
923         return;
924     }
925     if (token.name() == textareaTag) {
926         m_tree.insertHTMLElement(token);
927         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
928         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
929         m_originalInsertionMode = m_insertionMode;
930         m_framesetOk = false;
931         setInsertionMode(TextMode);
932         return;
933     }
934     if (token.name() == xmpTag) {
935         processFakePEndTagIfPInButtonScope();
936         m_tree.reconstructTheActiveFormattingElements();
937         m_framesetOk = false;
938         processGenericRawTextStartTag(token);
939         return;
940     }
941     if (token.name() == iframeTag) {
942         m_framesetOk = false;
943         processGenericRawTextStartTag(token);
944         return;
945     }
946     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
947         processGenericRawTextStartTag(token);
948         return;
949     }
950     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
951         processGenericRawTextStartTag(token);
952         return;
953     }
954     if (token.name() == selectTag) {
955         m_tree.reconstructTheActiveFormattingElements();
956         m_tree.insertHTMLElement(token);
957         m_framesetOk = false;
958         if (m_insertionMode == InTableMode
959              || m_insertionMode == InCaptionMode
960              || m_insertionMode == InColumnGroupMode
961              || m_insertionMode == InTableBodyMode
962              || m_insertionMode == InRowMode
963              || m_insertionMode == InCellMode)
964             setInsertionMode(InSelectInTableMode);
965         else
966             setInsertionMode(InSelectMode);
967         return;
968     }
969     if (token.name() == optgroupTag || token.name() == optionTag) {
970         if (m_tree.openElements()->inScope(optionTag.localName())) {
971             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
972             processEndTag(endOption);
973         }
974         m_tree.reconstructTheActiveFormattingElements();
975         m_tree.insertHTMLElement(token);
976         return;
977     }
978     if (token.name() == rpTag || token.name() == rtTag) {
979         if (m_tree.openElements()->inScope(rubyTag.localName())) {
980             m_tree.generateImpliedEndTags();
981             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
982                 parseError(token);
983                 m_tree.openElements()->popUntil(rubyTag.localName());
984             }
985         }
986         m_tree.insertHTMLElement(token);
987         return;
988     }
989     if (token.name() == MathMLNames::mathTag.localName()) {
990         m_tree.reconstructTheActiveFormattingElements();
991         adjustMathMLAttributes(token);
992         adjustForeignAttributes(token);
993         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
994         if (m_insertionMode != InForeignContentMode)
995             setInsertionMode(InForeignContentMode);
996         return;
997     }
998     if (token.name() == SVGNames::svgTag.localName()) {
999         m_tree.reconstructTheActiveFormattingElements();
1000         adjustSVGAttributes(token);
1001         adjustForeignAttributes(token);
1002         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1003         if (m_insertionMode != InForeignContentMode)
1004             setInsertionMode(InForeignContentMode);
1005         return;
1006     }
1007     if (isCaptionColOrColgroupTag(token.name())
1008         || token.name() == frameTag
1009         || token.name() == headTag
1010         || isTableBodyContextTag(token.name())
1011         || isTableCellContextTag(token.name())
1012         || token.name() == trTag) {
1013         parseError(token);
1014         return;
1015     }
1016     m_tree.reconstructTheActiveFormattingElements();
1017     m_tree.insertHTMLElement(token);
1018 }
1019
1020 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1021 {
1022     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1023         ASSERT(isParsingFragment());
1024         // FIXME: parse error
1025         return false;
1026     }
1027     m_tree.openElements()->pop();
1028     setInsertionMode(InTableMode);
1029     return true;
1030 }
1031
1032 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1033 void HTMLTreeBuilder::closeTheCell()
1034 {
1035     ASSERT(insertionMode() == InCellMode);
1036     if (m_tree.openElements()->inTableScope(tdTag)) {
1037         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1038         processFakeEndTag(tdTag);
1039         return;
1040     }
1041     ASSERT(m_tree.openElements()->inTableScope(thTag));
1042     processFakeEndTag(thTag);
1043     ASSERT(insertionMode() == InRowMode);
1044 }
1045
1046 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1047 {
1048     ASSERT(token.type() == HTMLToken::StartTag);
1049     if (token.name() == captionTag) {
1050         m_tree.openElements()->popUntilTableScopeMarker();
1051         m_tree.activeFormattingElements()->appendMarker();
1052         m_tree.insertHTMLElement(token);
1053         setInsertionMode(InCaptionMode);
1054         return;
1055     }
1056     if (token.name() == colgroupTag) {
1057         m_tree.openElements()->popUntilTableScopeMarker();
1058         m_tree.insertHTMLElement(token);
1059         setInsertionMode(InColumnGroupMode);
1060         return;
1061     }
1062     if (token.name() == colTag) {
1063         processFakeStartTag(colgroupTag);
1064         ASSERT(InColumnGroupMode);
1065         reprocessStartTag(token);
1066         return;
1067     }
1068     if (isTableBodyContextTag(token.name())) {
1069         m_tree.openElements()->popUntilTableScopeMarker();
1070         m_tree.insertHTMLElement(token);
1071         setInsertionMode(InTableBodyMode);
1072         return;
1073     }
1074     if (isTableCellContextTag(token.name())
1075         || token.name() == trTag) {
1076         processFakeStartTag(tbodyTag);
1077         ASSERT(insertionMode() == InTableBodyMode);
1078         reprocessStartTag(token);
1079         return;
1080     }
1081     if (token.name() == tableTag) {
1082         parseError(token);
1083         if (!processTableEndTagForInTable()) {
1084             ASSERT(isParsingFragment());
1085             return;
1086         }
1087         reprocessStartTag(token);
1088         return;
1089     }
1090     if (token.name() == styleTag || token.name() == scriptTag) {
1091         processStartTagForInHead(token);
1092         return;
1093     }
1094     if (token.name() == inputTag) {
1095         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1096         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1097             parseError(token);
1098             m_tree.insertSelfClosingHTMLElement(token);
1099             return;
1100         }
1101         // Fall through to "anything else" case.
1102     }
1103     if (token.name() == formTag) {
1104         parseError(token);
1105         if (m_tree.form())
1106             return;
1107         m_tree.insertHTMLFormElement(token, true);
1108         m_tree.openElements()->pop();
1109         return;
1110     }
1111     parseError(token);
1112     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1113     processStartTagForInBody(token);
1114 }
1115
1116 namespace {
1117
1118 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement)
1119 {
1120     ASSERT(token.type() == HTMLToken::StartTag);
1121     if (currentElement->hasTagName(MathMLNames::miTag)
1122         || currentElement->hasTagName(MathMLNames::moTag)
1123         || currentElement->hasTagName(MathMLNames::mnTag)
1124         || currentElement->hasTagName(MathMLNames::msTag)
1125         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1126         return token.name() != MathMLNames::mglyphTag
1127             && token.name() != MathMLNames::malignmarkTag;
1128     }
1129     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1130         return token.name() == SVGNames::svgTag;
1131     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1132         || currentElement->hasTagName(SVGNames::descTag)
1133         || currentElement->hasTagName(SVGNames::titleTag))
1134         return true;
1135     return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
1136 }
1137
1138 }
1139
1140 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1141 {
1142     ASSERT(token.type() == HTMLToken::StartTag);
1143     switch (insertionMode()) {
1144     case InitialMode:
1145         ASSERT(insertionMode() == InitialMode);
1146         defaultForInitial();
1147         // Fall through.
1148     case BeforeHTMLMode:
1149         ASSERT(insertionMode() == BeforeHTMLMode);
1150         if (token.name() == htmlTag) {
1151             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1152             setInsertionMode(BeforeHeadMode);
1153             return;
1154         }
1155         defaultForBeforeHTML();
1156         // Fall through.
1157     case BeforeHeadMode:
1158         ASSERT(insertionMode() == BeforeHeadMode);
1159         if (token.name() == htmlTag) {
1160             m_tree.insertHTMLHtmlStartTagInBody(token);
1161             return;
1162         }
1163         if (token.name() == headTag) {
1164             m_tree.insertHTMLHeadElement(token);
1165             setInsertionMode(InHeadMode);
1166             return;
1167         }
1168         defaultForBeforeHead();
1169         // Fall through.
1170     case InHeadMode:
1171         ASSERT(insertionMode() == InHeadMode);
1172         if (processStartTagForInHead(token))
1173             return;
1174         defaultForInHead();
1175         // Fall through.
1176     case AfterHeadMode:
1177         ASSERT(insertionMode() == AfterHeadMode);
1178         if (token.name() == htmlTag) {
1179             m_tree.insertHTMLHtmlStartTagInBody(token);
1180             return;
1181         }
1182         if (token.name() == bodyTag) {
1183             m_framesetOk = false;
1184             m_tree.insertHTMLBodyElement(token);
1185             setInsertionMode(InBodyMode);
1186             return;
1187         }
1188         if (token.name() == framesetTag) {
1189             m_tree.insertHTMLElement(token);
1190             setInsertionMode(InFramesetMode);
1191             return;
1192         }
1193         if (token.name() == baseTag
1194             || token.name() == basefontTag
1195             || token.name() == bgsoundTag
1196             || token.name() == linkTag
1197             || token.name() == metaTag
1198             || token.name() == noframesTag
1199             || token.name() == scriptTag
1200             || token.name() == styleTag
1201             || token.name() == titleTag) {
1202             parseError(token);
1203             ASSERT(m_tree.head());
1204             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1205             processStartTagForInHead(token);
1206             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1207             return;
1208         }
1209         if (token.name() == headTag) {
1210             parseError(token);
1211             return;
1212         }
1213         defaultForAfterHead();
1214         // Fall through
1215     case InBodyMode:
1216         ASSERT(insertionMode() == InBodyMode);
1217         processStartTagForInBody(token);
1218         break;
1219     case InTableMode:
1220         ASSERT(insertionMode() == InTableMode);
1221         processStartTagForInTable(token);
1222         break;
1223     case InCaptionMode:
1224         ASSERT(insertionMode() == InCaptionMode);
1225         if (isCaptionColOrColgroupTag(token.name())
1226             || isTableBodyContextTag(token.name())
1227             || isTableCellContextTag(token.name())
1228             || token.name() == trTag) {
1229             parseError(token);
1230             if (!processCaptionEndTagForInCaption()) {
1231                 ASSERT(isParsingFragment());
1232                 return;
1233             }
1234             reprocessStartTag(token);
1235             return;
1236         }
1237         processStartTagForInBody(token);
1238         break;
1239     case InColumnGroupMode:
1240         ASSERT(insertionMode() == InColumnGroupMode);
1241         if (token.name() == htmlTag) {
1242             m_tree.insertHTMLHtmlStartTagInBody(token);
1243             return;
1244         }
1245         if (token.name() == colTag) {
1246             m_tree.insertSelfClosingHTMLElement(token);
1247             return;
1248         }
1249         if (!processColgroupEndTagForInColumnGroup()) {
1250             ASSERT(isParsingFragment());
1251             return;
1252         }
1253         reprocessStartTag(token);
1254         break;
1255     case InTableBodyMode:
1256         ASSERT(insertionMode() == InTableBodyMode);
1257         if (token.name() == trTag) {
1258             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1259             m_tree.insertHTMLElement(token);
1260             setInsertionMode(InRowMode);
1261             return;
1262         }
1263         if (isTableCellContextTag(token.name())) {
1264             parseError(token);
1265             processFakeStartTag(trTag);
1266             ASSERT(insertionMode() == InRowMode);
1267             reprocessStartTag(token);
1268             return;
1269         }
1270         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1271             // FIXME: This is slow.
1272             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1273                 ASSERT(isParsingFragment());
1274                 parseError(token);
1275                 return;
1276             }
1277             m_tree.openElements()->popUntilTableBodyScopeMarker();
1278             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1279             processFakeEndTag(m_tree.currentElement()->tagQName());
1280             reprocessStartTag(token);
1281             return;
1282         }
1283         processStartTagForInTable(token);
1284         break;
1285     case InRowMode:
1286         ASSERT(insertionMode() == InRowMode);
1287         if (isTableCellContextTag(token.name())) {
1288             m_tree.openElements()->popUntilTableRowScopeMarker();
1289             m_tree.insertHTMLElement(token);
1290             setInsertionMode(InCellMode);
1291             m_tree.activeFormattingElements()->appendMarker();
1292             return;
1293         }
1294         if (token.name() == trTag
1295             || isCaptionColOrColgroupTag(token.name())
1296             || isTableBodyContextTag(token.name())) {
1297             if (!processTrEndTagForInRow()) {
1298                 ASSERT(isParsingFragment());
1299                 return;
1300             }
1301             ASSERT(insertionMode() == InTableBodyMode);
1302             reprocessStartTag(token);
1303             return;
1304         }
1305         processStartTagForInTable(token);
1306         break;
1307     case InCellMode:
1308         ASSERT(insertionMode() == InCellMode);
1309         if (isCaptionColOrColgroupTag(token.name())
1310             || isTableCellContextTag(token.name())
1311             || token.name() == trTag
1312             || isTableBodyContextTag(token.name())) {
1313             // FIXME: This could be more efficient.
1314             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1315                 ASSERT(isParsingFragment());
1316                 parseError(token);
1317                 return;
1318             }
1319             closeTheCell();
1320             reprocessStartTag(token);
1321             return;
1322         }
1323         processStartTagForInBody(token);
1324         break;
1325     case AfterBodyMode:
1326     case AfterAfterBodyMode:
1327         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1328         if (token.name() == htmlTag) {
1329             m_tree.insertHTMLHtmlStartTagInBody(token);
1330             return;
1331         }
1332         setInsertionMode(InBodyMode);
1333         reprocessStartTag(token);
1334         break;
1335     case InHeadNoscriptMode:
1336         ASSERT(insertionMode() == InHeadNoscriptMode);
1337         if (token.name() == htmlTag) {
1338             m_tree.insertHTMLHtmlStartTagInBody(token);
1339             return;
1340         }
1341         if (token.name() == basefontTag
1342             || token.name() == bgsoundTag
1343             || token.name() == linkTag
1344             || token.name() == metaTag
1345             || token.name() == noframesTag
1346             || token.name() == styleTag) {
1347             bool didProcess = processStartTagForInHead(token);
1348             ASSERT_UNUSED(didProcess, didProcess);
1349             return;
1350         }
1351         if (token.name() == htmlTag || token.name() == noscriptTag) {
1352             parseError(token);
1353             return;
1354         }
1355         defaultForInHeadNoscript();
1356         processToken(token);
1357         break;
1358     case InFramesetMode:
1359         ASSERT(insertionMode() == InFramesetMode);
1360         if (token.name() == htmlTag) {
1361             m_tree.insertHTMLHtmlStartTagInBody(token);
1362             return;
1363         }
1364         if (token.name() == framesetTag) {
1365             m_tree.insertHTMLElement(token);
1366             return;
1367         }
1368         if (token.name() == frameTag) {
1369             m_tree.insertSelfClosingHTMLElement(token);
1370             return;
1371         }
1372         if (token.name() == noframesTag) {
1373             processStartTagForInHead(token);
1374             return;
1375         }
1376         parseError(token);
1377         break;
1378     case AfterFramesetMode:
1379     case AfterAfterFramesetMode:
1380         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1381         if (token.name() == htmlTag) {
1382             m_tree.insertHTMLHtmlStartTagInBody(token);
1383             return;
1384         }
1385         if (token.name() == noframesTag) {
1386             processStartTagForInHead(token);
1387             return;
1388         }
1389         parseError(token);
1390         break;
1391     case InSelectInTableMode:
1392         ASSERT(insertionMode() == InSelectInTableMode);
1393         if (token.name() == captionTag
1394             || token.name() == tableTag
1395             || isTableBodyContextTag(token.name())
1396             || token.name() == trTag
1397             || isTableCellContextTag(token.name())) {
1398             parseError(token);
1399             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1400             processEndTag(endSelect);
1401             reprocessStartTag(token);
1402             return;
1403         }
1404         // Fall through
1405     case InSelectMode:
1406         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1407         if (token.name() == htmlTag) {
1408             m_tree.insertHTMLHtmlStartTagInBody(token);
1409             return;
1410         }
1411         if (token.name() == optionTag) {
1412             if (m_tree.currentElement()->hasTagName(optionTag)) {
1413                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1414                 processEndTag(endOption);
1415             }
1416             m_tree.insertHTMLElement(token);
1417             return;
1418         }
1419         if (token.name() == optgroupTag) {
1420             if (m_tree.currentElement()->hasTagName(optionTag)) {
1421                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1422                 processEndTag(endOption);
1423             }
1424             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1425                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1426                 processEndTag(endOptgroup);
1427             }
1428             m_tree.insertHTMLElement(token);
1429             return;
1430         }
1431         if (token.name() == selectTag) {
1432             parseError(token);
1433             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1434             processEndTag(endSelect);
1435             return;
1436         }
1437         if (token.name() == inputTag
1438             || token.name() == keygenTag
1439             || token.name() == textareaTag) {
1440             parseError(token);
1441             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1442                 ASSERT(isParsingFragment());
1443                 return;
1444             }
1445             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1446             processEndTag(endSelect);
1447             reprocessStartTag(token);
1448             return;
1449         }
1450         if (token.name() == scriptTag) {
1451             bool didProcess = processStartTagForInHead(token);
1452             ASSERT_UNUSED(didProcess, didProcess);
1453             return;
1454         }
1455         break;
1456     case InTableTextMode:
1457         defaultForInTableText();
1458         processStartTag(token);
1459         break;
1460     case InForeignContentMode: {
1461         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentElement())) {
1462             processForeignContentUsingInBodyModeAndResetMode(token);
1463             return;
1464         }
1465         if (token.name() == bTag
1466             || token.name() == bigTag
1467             || token.name() == blockquoteTag
1468             || token.name() == bodyTag
1469             || token.name() == brTag
1470             || token.name() == centerTag
1471             || token.name() == codeTag
1472             || token.name() == ddTag
1473             || token.name() == divTag
1474             || token.name() == dlTag
1475             || token.name() == dtTag
1476             || token.name() == emTag
1477             || token.name() == embedTag
1478             || isNumberedHeaderTag(token.name())
1479             || token.name() == headTag
1480             || token.name() == hrTag
1481             || token.name() == iTag
1482             || token.name() == imgTag
1483             || token.name() == liTag
1484             || token.name() == listingTag
1485             || token.name() == menuTag
1486             || token.name() == metaTag
1487             || token.name() == nobrTag
1488             || token.name() == olTag
1489             || token.name() == pTag
1490             || token.name() == preTag
1491             || token.name() == rubyTag
1492             || token.name() == sTag
1493             || token.name() == smallTag
1494             || token.name() == spanTag
1495             || token.name() == strongTag
1496             || token.name() == strikeTag
1497             || token.name() == subTag
1498             || token.name() == supTag
1499             || token.name() == tableTag
1500             || token.name() == ttTag
1501             || token.name() == uTag
1502             || token.name() == ulTag
1503             || token.name() == varTag
1504             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1505             parseError(token);
1506             m_tree.openElements()->popUntilForeignContentScopeMarker();
1507             resetInsertionModeAppropriately();
1508             reprocessStartTag(token);
1509             return;
1510         }
1511         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1512         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1513             adjustMathMLAttributes(token);
1514         if (currentNamespace == SVGNames::svgNamespaceURI) {
1515             adjustSVGTagNameCase(token);
1516             adjustSVGAttributes(token);
1517         }
1518         adjustForeignAttributes(token);
1519         m_tree.insertForeignElement(token, currentNamespace);
1520         break;
1521     }
1522     case TextMode:
1523         ASSERT_NOT_REACHED();
1524         break;
1525     }
1526 }
1527
1528 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1529 {
1530     ASSERT(token.type() == HTMLToken::EndTag);
1531     ASSERT(token.name() == bodyTag);
1532     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1533         parseError(token);
1534         return false;
1535     }
1536     notImplemented(); // Emit a more specific parse error based on stack contents.
1537     setInsertionMode(AfterBodyMode);
1538     return true;
1539 }
1540
1541 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1542 {
1543     ASSERT(token.type() == HTMLToken::EndTag);
1544     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1545     while (1) {
1546         Element* node = record->element();
1547         if (node->hasLocalName(token.name())) {
1548             m_tree.generateImpliedEndTags();
1549             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1550                 parseError(token);
1551                 // FIXME: This is either a bug in the spec, or a bug in our
1552                 // implementation.  Filed a bug with HTML5:
1553                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1554                 // We might have already popped the node for the token in
1555                 // generateImpliedEndTags, just abort.
1556                 if (!m_tree.openElements()->contains(node))
1557                     return;
1558             }
1559             m_tree.openElements()->popUntilPopped(node);
1560             return;
1561         }
1562         if (isSpecialNode(node)) {
1563             parseError(token);
1564             return;
1565         }
1566         record = record->next();
1567     }
1568 }
1569
1570 // FIXME: This probably belongs on HTMLElementStack.
1571 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1572 {
1573     HTMLElementStack::ElementRecord* furthestBlock = 0;
1574     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1575     for (; record; record = record->next()) {
1576         if (record->element() == formattingElement)
1577             return furthestBlock;
1578         if (isSpecialNode(record->element()))
1579             furthestBlock = record;
1580     }
1581     ASSERT_NOT_REACHED();
1582     return 0;
1583 }
1584
1585 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1586 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1587 {
1588     // The adoption agency algorithm is N^2.  We limit the number of iterations
1589     // to stop from hanging the whole browser.  This limit is copied from the
1590     // legacy tree builder and might need to be tweaked in the future.
1591     static const int adoptionAgencyIterationLimit = 10;
1592
1593     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1594         // 1.
1595         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1596         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1597             parseError(token);
1598             notImplemented(); // Check the stack of open elements for a more specific parse error.
1599             return;
1600         }
1601         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1602         if (!formattingElementRecord) {
1603             parseError(token);
1604             m_tree.activeFormattingElements()->remove(formattingElement);
1605             return;
1606         }
1607         if (formattingElement != m_tree.currentElement())
1608             parseError(token);
1609         // 2.
1610         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1611         // 3.
1612         if (!furthestBlock) {
1613             m_tree.openElements()->popUntilPopped(formattingElement);
1614             m_tree.activeFormattingElements()->remove(formattingElement);
1615             return;
1616         }
1617         // 4.
1618         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1619         Element* commonAncestor = formattingElementRecord->next()->element();
1620         // 5.
1621         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1622         // 6.
1623         HTMLElementStack::ElementRecord* node = furthestBlock;
1624         HTMLElementStack::ElementRecord* nextNode = node->next();
1625         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1626         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1627             // 6.1
1628             node = nextNode;
1629             ASSERT(node);
1630             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1631             // 6.2
1632             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1633                 m_tree.openElements()->remove(node->element());
1634                 node = 0;
1635                 continue;
1636             }
1637             // 6.3
1638             if (node == formattingElementRecord)
1639                 break;
1640             // 6.5
1641             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1642             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1643             nodeEntry->replaceElement(newElement.get());
1644             node->replaceElement(newElement.release());
1645             // 6.4 -- Intentionally out of order to handle the case where node
1646             // was replaced in 6.5.
1647             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1648             if (lastNode == furthestBlock)
1649                 bookmark.moveToAfter(nodeEntry);
1650             // 6.6
1651             if (Element* parent = lastNode->element()->parentElement())
1652                 parent->parserRemoveChild(lastNode->element());
1653             node->element()->parserAddChild(lastNode->element());
1654             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1655                 lastNode->element()->lazyAttach();
1656             // 6.7
1657             lastNode = node;
1658         }
1659         // 7
1660         const AtomicString& commonAncestorTag = commonAncestor->localName();
1661         if (Element* parent = lastNode->element()->parentElement())
1662             parent->parserRemoveChild(lastNode->element());
1663         // FIXME: If this moves to HTMLConstructionSite, this check should use
1664         // causesFosterParenting(tagName) instead.
1665         if (commonAncestorTag == tableTag
1666             || commonAncestorTag == trTag
1667             || isTableBodyContextTag(commonAncestorTag))
1668             m_tree.fosterParent(lastNode->element());
1669         else {
1670             commonAncestor->parserAddChild(lastNode->element());
1671             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1672                 lastNode->element()->lazyAttach();
1673         }
1674         // 8
1675         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1676         // 9
1677         newElement->takeAllChildrenFrom(furthestBlock->element());
1678         // 10
1679         Element* furthestBlockElement = furthestBlock->element();
1680         // FIXME: All this creation / parserAddChild / attach business should
1681         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1682         //        should all be in some HTMLConstructionSite function.
1683         furthestBlockElement->parserAddChild(newElement);
1684         if (furthestBlockElement->attached() && !newElement->attached()) {
1685             // Notice that newElement might already be attached if, for example, one of the reparented
1686             // children is a style element, which attaches itself automatically.
1687             newElement->attach();
1688         }
1689         // 11
1690         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1691         // 12
1692         m_tree.openElements()->remove(formattingElement);
1693         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1694     }
1695 }
1696
1697 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1698 {
1699     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1700     bool last = false;
1701     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1702     while (1) {
1703         Element* node = nodeRecord->element();
1704         if (node == m_tree.openElements()->bottom()) {
1705             ASSERT(isParsingFragment());
1706             last = true;
1707             node = m_fragmentContext.contextElement();
1708         }
1709         if (node->hasTagName(selectTag)) {
1710             ASSERT(isParsingFragment());
1711             return setInsertionMode(InSelectMode);
1712         }
1713         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1714             return setInsertionMode(InCellMode);
1715         if (node->hasTagName(trTag))
1716             return setInsertionMode(InRowMode);
1717         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1718             return setInsertionMode(InTableBodyMode);
1719         if (node->hasTagName(captionTag))
1720             return setInsertionMode(InCaptionMode);
1721         if (node->hasTagName(colgroupTag)) {
1722             ASSERT(isParsingFragment());
1723             return setInsertionMode(InColumnGroupMode);
1724         }
1725         if (node->hasTagName(tableTag))
1726             return setInsertionMode(InTableMode);
1727         if (node->hasTagName(headTag)) {
1728             ASSERT(isParsingFragment());
1729             return setInsertionMode(InBodyMode);
1730         }
1731         if (node->hasTagName(bodyTag))
1732             return setInsertionMode(InBodyMode);
1733         if (node->hasTagName(framesetTag)) {
1734             ASSERT(isParsingFragment());
1735             return setInsertionMode(InFramesetMode);
1736         }
1737         if (node->hasTagName(htmlTag)) {
1738             ASSERT(isParsingFragment());
1739             return setInsertionMode(BeforeHeadMode);
1740         }
1741         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1742             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1743             return setInsertionMode(InForeignContentMode);
1744         if (last) {
1745             ASSERT(isParsingFragment());
1746             return setInsertionMode(InBodyMode);
1747         }
1748         nodeRecord = nodeRecord->next();
1749     }
1750 }
1751
1752 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1753 {
1754     ASSERT(token.type() == HTMLToken::EndTag);
1755     if (isTableBodyContextTag(token.name())) {
1756         if (!m_tree.openElements()->inTableScope(token.name())) {
1757             parseError(token);
1758             return;
1759         }
1760         m_tree.openElements()->popUntilTableBodyScopeMarker();
1761         m_tree.openElements()->pop();
1762         setInsertionMode(InTableMode);
1763         return;
1764     }
1765     if (token.name() == tableTag) {
1766         // FIXME: This is slow.
1767         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1768             ASSERT(isParsingFragment());
1769             parseError(token);
1770             return;
1771         }
1772         m_tree.openElements()->popUntilTableBodyScopeMarker();
1773         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1774         processFakeEndTag(m_tree.currentElement()->tagQName());
1775         reprocessEndTag(token);
1776         return;
1777     }
1778     if (token.name() == bodyTag
1779         || isCaptionColOrColgroupTag(token.name())
1780         || token.name() == htmlTag
1781         || isTableCellContextTag(token.name())
1782         || token.name() == trTag) {
1783         parseError(token);
1784         return;
1785     }
1786     processEndTagForInTable(token);
1787 }
1788
1789 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1790 {
1791     ASSERT(token.type() == HTMLToken::EndTag);
1792     if (token.name() == trTag) {
1793         processTrEndTagForInRow();
1794         return;
1795     }
1796     if (token.name() == tableTag) {
1797         if (!processTrEndTagForInRow()) {
1798             ASSERT(isParsingFragment());
1799             return;
1800         }
1801         ASSERT(insertionMode() == InTableBodyMode);
1802         reprocessEndTag(token);
1803         return;
1804     }
1805     if (isTableBodyContextTag(token.name())) {
1806         if (!m_tree.openElements()->inTableScope(token.name())) {
1807             parseError(token);
1808             return;
1809         }
1810         processFakeEndTag(trTag);
1811         ASSERT(insertionMode() == InTableBodyMode);
1812         reprocessEndTag(token);
1813         return;
1814     }
1815     if (token.name() == bodyTag
1816         || isCaptionColOrColgroupTag(token.name())
1817         || token.name() == htmlTag
1818         || isTableCellContextTag(token.name())) {
1819         parseError(token);
1820         return;
1821     }
1822     processEndTagForInTable(token);
1823 }
1824
1825 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1826 {
1827     ASSERT(token.type() == HTMLToken::EndTag);
1828     if (isTableCellContextTag(token.name())) {
1829         if (!m_tree.openElements()->inTableScope(token.name())) {
1830             parseError(token);
1831             return;
1832         }
1833         m_tree.generateImpliedEndTags();
1834         if (!m_tree.currentElement()->hasLocalName(token.name()))
1835             parseError(token);
1836         m_tree.openElements()->popUntilPopped(token.name());
1837         m_tree.activeFormattingElements()->clearToLastMarker();
1838         setInsertionMode(InRowMode);
1839         return;
1840     }
1841     if (token.name() == bodyTag
1842         || isCaptionColOrColgroupTag(token.name())
1843         || token.name() == htmlTag) {
1844         parseError(token);
1845         return;
1846     }
1847     if (token.name() == tableTag
1848         || token.name() == trTag
1849         || isTableBodyContextTag(token.name())) {
1850         if (!m_tree.openElements()->inTableScope(token.name())) {
1851             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1852             parseError(token);
1853             return;
1854         }
1855         closeTheCell();
1856         reprocessEndTag(token);
1857         return;
1858     }
1859     processEndTagForInBody(token);
1860 }
1861
1862 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1863 {
1864     ASSERT(token.type() == HTMLToken::EndTag);
1865     if (token.name() == bodyTag) {
1866         processBodyEndTagForInBody(token);
1867         return;
1868     }
1869     if (token.name() == htmlTag) {
1870         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1871         if (processBodyEndTagForInBody(endBody))
1872             reprocessEndTag(token);
1873         return;
1874     }
1875     if (token.name() == addressTag
1876         || token.name() == articleTag
1877         || token.name() == asideTag
1878         || token.name() == blockquoteTag
1879         || token.name() == buttonTag
1880         || token.name() == centerTag
1881         || token.name() == detailsTag
1882         || token.name() == dirTag
1883         || token.name() == divTag
1884         || token.name() == dlTag
1885         || token.name() == fieldsetTag
1886         || token.name() == figcaptionTag
1887         || token.name() == figureTag
1888         || token.name() == footerTag
1889         || token.name() == headerTag
1890         || token.name() == hgroupTag
1891         || token.name() == listingTag
1892         || token.name() == menuTag
1893         || token.name() == navTag
1894         || token.name() == olTag
1895         || token.name() == preTag
1896         || token.name() == sectionTag
1897         || token.name() == summaryTag
1898         || token.name() == ulTag) {
1899         if (!m_tree.openElements()->inScope(token.name())) {
1900             parseError(token);
1901             return;
1902         }
1903         m_tree.generateImpliedEndTags();
1904         if (!m_tree.currentElement()->hasLocalName(token.name()))
1905             parseError(token);
1906         m_tree.openElements()->popUntilPopped(token.name());
1907         return;
1908     }
1909     if (token.name() == formTag) {
1910         RefPtr<Element> node = m_tree.takeForm();
1911         if (!node || !m_tree.openElements()->inScope(node.get())) {
1912             parseError(token);
1913             return;
1914         }
1915         m_tree.generateImpliedEndTags();
1916         if (m_tree.currentElement() != node.get())
1917             parseError(token);
1918         m_tree.openElements()->remove(node.get());
1919     }
1920     if (token.name() == pTag) {
1921         if (!m_tree.openElements()->inButtonScope(token.name())) {
1922             parseError(token);
1923             processFakeStartTag(pTag);
1924             ASSERT(m_tree.openElements()->inScope(token.name()));
1925             reprocessEndTag(token);
1926             return;
1927         }
1928         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1929         if (!m_tree.currentElement()->hasLocalName(token.name()))
1930             parseError(token);
1931         m_tree.openElements()->popUntilPopped(token.name());
1932         return;
1933     }
1934     if (token.name() == liTag) {
1935         if (!m_tree.openElements()->inListItemScope(token.name())) {
1936             parseError(token);
1937             return;
1938         }
1939         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1940         if (!m_tree.currentElement()->hasLocalName(token.name()))
1941             parseError(token);
1942         m_tree.openElements()->popUntilPopped(token.name());
1943         return;
1944     }
1945     if (token.name() == ddTag
1946         || token.name() == dtTag) {
1947         if (!m_tree.openElements()->inScope(token.name())) {
1948             parseError(token);
1949             return;
1950         }
1951         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1952         if (!m_tree.currentElement()->hasLocalName(token.name()))
1953             parseError(token);
1954         m_tree.openElements()->popUntilPopped(token.name());
1955         return;
1956     }
1957     if (isNumberedHeaderTag(token.name())) {
1958         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1959             parseError(token);
1960             return;
1961         }
1962         m_tree.generateImpliedEndTags();
1963         if (!m_tree.currentElement()->hasLocalName(token.name()))
1964             parseError(token);
1965         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1966         return;
1967     }
1968     if (isFormattingTag(token.name())) {
1969         callTheAdoptionAgency(token);
1970         return;
1971     }
1972     if (token.name() == appletTag
1973         || token.name() == marqueeTag
1974         || token.name() == objectTag) {
1975         if (!m_tree.openElements()->inScope(token.name())) {
1976             parseError(token);
1977             return;
1978         }
1979         m_tree.generateImpliedEndTags();
1980         if (!m_tree.currentElement()->hasLocalName(token.name()))
1981             parseError(token);
1982         m_tree.openElements()->popUntilPopped(token.name());
1983         m_tree.activeFormattingElements()->clearToLastMarker();
1984         return;
1985     }
1986     if (token.name() == brTag) {
1987         parseError(token);
1988         processFakeStartTag(brTag);
1989         return;
1990     }
1991     processAnyOtherEndTagForInBody(token);
1992 }
1993
1994 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1995 {
1996     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1997         ASSERT(isParsingFragment());
1998         // FIXME: parse error
1999         return false;
2000     }
2001     m_tree.generateImpliedEndTags();
2002     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2003     m_tree.openElements()->popUntilPopped(captionTag.localName());
2004     m_tree.activeFormattingElements()->clearToLastMarker();
2005     setInsertionMode(InTableMode);
2006     return true;
2007 }
2008
2009 bool HTMLTreeBuilder::processTrEndTagForInRow()
2010 {
2011     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2012         ASSERT(isParsingFragment());
2013         // FIXME: parse error
2014         return false;
2015     }
2016     m_tree.openElements()->popUntilTableRowScopeMarker();
2017     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2018     m_tree.openElements()->pop();
2019     setInsertionMode(InTableBodyMode);
2020     return true;
2021 }
2022
2023 bool HTMLTreeBuilder::processTableEndTagForInTable()
2024 {
2025     if (!m_tree.openElements()->inTableScope(tableTag)) {
2026         ASSERT(isParsingFragment());
2027         // FIXME: parse error.
2028         return false;
2029     }
2030     m_tree.openElements()->popUntilPopped(tableTag.localName());
2031     resetInsertionModeAppropriately();
2032     return true;
2033 }
2034
2035 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2036 {
2037     ASSERT(token.type() == HTMLToken::EndTag);
2038     if (token.name() == tableTag) {
2039         processTableEndTagForInTable();
2040         return;
2041     }
2042     if (token.name() == bodyTag
2043         || isCaptionColOrColgroupTag(token.name())
2044         || token.name() == htmlTag
2045         || isTableBodyContextTag(token.name())
2046         || isTableCellContextTag(token.name())
2047         || token.name() == trTag) {
2048         parseError(token);
2049         return;
2050     }
2051     // Is this redirection necessary here?
2052     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2053     processEndTagForInBody(token);
2054 }
2055
2056 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2057 {
2058     ASSERT(token.type() == HTMLToken::EndTag);
2059     switch (insertionMode()) {
2060     case InitialMode:
2061         ASSERT(insertionMode() == InitialMode);
2062         defaultForInitial();
2063         // Fall through.
2064     case BeforeHTMLMode:
2065         ASSERT(insertionMode() == BeforeHTMLMode);
2066         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2067             parseError(token);
2068             return;
2069         }
2070         defaultForBeforeHTML();
2071         // Fall through.
2072     case BeforeHeadMode:
2073         ASSERT(insertionMode() == BeforeHeadMode);
2074         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2075             parseError(token);
2076             return;
2077         }
2078         defaultForBeforeHead();
2079         // Fall through.
2080     case InHeadMode:
2081         ASSERT(insertionMode() == InHeadMode);
2082         if (token.name() == headTag) {
2083             m_tree.openElements()->popHTMLHeadElement();
2084             setInsertionMode(AfterHeadMode);
2085             return;
2086         }
2087         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2088             parseError(token);
2089             return;
2090         }
2091         defaultForInHead();
2092         // Fall through.
2093     case AfterHeadMode:
2094         ASSERT(insertionMode() == AfterHeadMode);
2095         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2096             parseError(token);
2097             return;
2098         }
2099         defaultForAfterHead();
2100         // Fall through
2101     case InBodyMode:
2102         ASSERT(insertionMode() == InBodyMode);
2103         processEndTagForInBody(token);
2104         break;
2105     case InTableMode:
2106         ASSERT(insertionMode() == InTableMode);
2107         processEndTagForInTable(token);
2108         break;
2109     case InCaptionMode:
2110         ASSERT(insertionMode() == InCaptionMode);
2111         if (token.name() == captionTag) {
2112             processCaptionEndTagForInCaption();
2113             return;
2114         }
2115         if (token.name() == tableTag) {
2116             parseError(token);
2117             if (!processCaptionEndTagForInCaption()) {
2118                 ASSERT(isParsingFragment());
2119                 return;
2120             }
2121             reprocessEndTag(token);
2122             return;
2123         }
2124         if (token.name() == bodyTag
2125             || token.name() == colTag
2126             || token.name() == colgroupTag
2127             || token.name() == htmlTag
2128             || isTableBodyContextTag(token.name())
2129             || isTableCellContextTag(token.name())
2130             || token.name() == trTag) {
2131             parseError(token);
2132             return;
2133         }
2134         processEndTagForInBody(token);
2135         break;
2136     case InColumnGroupMode:
2137         ASSERT(insertionMode() == InColumnGroupMode);
2138         if (token.name() == colgroupTag) {
2139             processColgroupEndTagForInColumnGroup();
2140             return;
2141         }
2142         if (token.name() == colTag) {
2143             parseError(token);
2144             return;
2145         }
2146         if (!processColgroupEndTagForInColumnGroup()) {
2147             ASSERT(isParsingFragment());
2148             return;
2149         }
2150         reprocessEndTag(token);
2151         break;
2152     case InRowMode:
2153         ASSERT(insertionMode() == InRowMode);
2154         processEndTagForInRow(token);
2155         break;
2156     case InCellMode:
2157         ASSERT(insertionMode() == InCellMode);
2158         processEndTagForInCell(token);
2159         break;
2160     case InTableBodyMode:
2161         ASSERT(insertionMode() == InTableBodyMode);
2162         processEndTagForInTableBody(token);
2163         break;
2164     case AfterBodyMode:
2165         ASSERT(insertionMode() == AfterBodyMode);
2166         if (token.name() == htmlTag) {
2167             if (isParsingFragment()) {
2168                 parseError(token);
2169                 return;
2170             }
2171             setInsertionMode(AfterAfterBodyMode);
2172             return;
2173         }
2174         prepareToReprocessToken();
2175         // Fall through.
2176     case AfterAfterBodyMode:
2177         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2178         parseError(token);
2179         setInsertionMode(InBodyMode);
2180         reprocessEndTag(token);
2181         break;
2182     case InHeadNoscriptMode:
2183         ASSERT(insertionMode() == InHeadNoscriptMode);
2184         if (token.name() == noscriptTag) {
2185             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2186             m_tree.openElements()->pop();
2187             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2188             setInsertionMode(InHeadMode);
2189             return;
2190         }
2191         if (token.name() != brTag) {
2192             parseError(token);
2193             return;
2194         }
2195         defaultForInHeadNoscript();
2196         processToken(token);
2197         break;
2198     case TextMode:
2199         if (token.name() == scriptTag) {
2200             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2201             m_isPaused = true;
2202             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2203             m_scriptToProcess = m_tree.currentElement();
2204             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2205             m_tree.openElements()->pop();
2206             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2207                 m_scriptToProcess->removeAllChildren();
2208             setInsertionMode(m_originalInsertionMode);
2209
2210             // This token will not have been created by the tokenizer if a
2211             // self-closing script tag was encountered and pre-HTML5 parser
2212             // quirks are enabled. We must set the tokenizer's state to
2213             // DataState explicitly if the tokenizer didn't have a chance to.
2214             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2215             m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2216             return;
2217         }
2218         m_tree.openElements()->pop();
2219         setInsertionMode(m_originalInsertionMode);
2220         break;
2221     case InFramesetMode:
2222         ASSERT(insertionMode() == InFramesetMode);
2223         if (token.name() == framesetTag) {
2224             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2225                 parseError(token);
2226                 return;
2227             }
2228             m_tree.openElements()->pop();
2229             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2230                 setInsertionMode(AfterFramesetMode);
2231             return;
2232         }
2233         break;
2234     case AfterFramesetMode:
2235         ASSERT(insertionMode() == AfterFramesetMode);
2236         if (token.name() == htmlTag) {
2237             setInsertionMode(AfterAfterFramesetMode);
2238             return;
2239         }
2240         // Fall through.
2241     case AfterAfterFramesetMode:
2242         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2243         parseError(token);
2244         break;
2245     case InSelectInTableMode:
2246         ASSERT(insertionMode() == InSelectInTableMode);
2247         if (token.name() == captionTag
2248             || token.name() == tableTag
2249             || isTableBodyContextTag(token.name())
2250             || token.name() == trTag
2251             || isTableCellContextTag(token.name())) {
2252             parseError(token);
2253             if (m_tree.openElements()->inTableScope(token.name())) {
2254                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2255                 processEndTag(endSelect);
2256                 reprocessEndTag(token);
2257             }
2258             return;
2259         }
2260         // Fall through.
2261     case InSelectMode:
2262         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2263         if (token.name() == optgroupTag) {
2264             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2265                 processFakeEndTag(optionTag);
2266             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2267                 m_tree.openElements()->pop();
2268                 return;
2269             }
2270             parseError(token);
2271             return;
2272         }
2273         if (token.name() == optionTag) {
2274             if (m_tree.currentElement()->hasTagName(optionTag)) {
2275                 m_tree.openElements()->pop();
2276                 return;
2277             }
2278             parseError(token);
2279             return;
2280         }
2281         if (token.name() == selectTag) {
2282             if (!m_tree.openElements()->inSelectScope(token.name())) {
2283                 ASSERT(isParsingFragment());
2284                 parseError(token);
2285                 return;
2286             }
2287             m_tree.openElements()->popUntilPopped(selectTag.localName());
2288             resetInsertionModeAppropriately();
2289             return;
2290         }
2291         break;
2292     case InTableTextMode:
2293         defaultForInTableText();
2294         processEndTag(token);
2295         break;
2296     case InForeignContentMode:
2297         if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
2298             notImplemented();
2299             return;
2300         }
2301         if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
2302             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2303             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2304             if (!nodeRecord->element()->hasLocalName(token.name()))
2305                 parseError(token);
2306             while (1) {
2307                 if (nodeRecord->element()->hasLocalName(token.name())) {
2308                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2309                     resetForeignInsertionMode();
2310                     return;
2311                 }
2312                 nodeRecord = nodeRecord->next();
2313                 if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
2314                     break;
2315             }
2316         }
2317         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2318         processForeignContentUsingInBodyModeAndResetMode(token);
2319         break;
2320     }
2321 }
2322
2323 void HTMLTreeBuilder::prepareToReprocessToken()
2324 {
2325     if (m_hasPendingForeignInsertionModeSteps) {
2326         resetForeignInsertionMode();
2327         m_hasPendingForeignInsertionModeSteps = false;
2328     }
2329 }
2330
2331 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2332 {
2333     prepareToReprocessToken();
2334     processStartTag(token);
2335 }
2336
2337 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2338 {
2339     prepareToReprocessToken();
2340     processEndTag(token);
2341 }
2342
2343 class HTMLTreeBuilder::FakeInsertionMode {
2344     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2345 public:
2346     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2347         : m_treeBuilder(treeBuilder)
2348         , m_originalMode(treeBuilder->insertionMode())
2349     {
2350         m_treeBuilder->setFakeInsertionMode(mode);
2351     }
2352
2353     ~FakeInsertionMode()
2354     {
2355         if (m_treeBuilder->isFakeInsertionMode())
2356             m_treeBuilder->setInsertionMode(m_originalMode);
2357     }
2358
2359 private:
2360     HTMLTreeBuilder* m_treeBuilder;
2361     InsertionMode m_originalMode;
2362 };
2363
2364 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2365 {
2366     m_hasPendingForeignInsertionModeSteps = true;
2367     {
2368         FakeInsertionMode fakeMode(this, InBodyMode);
2369         processToken(token);
2370     }
2371     if (m_hasPendingForeignInsertionModeSteps)
2372         resetForeignInsertionMode();
2373 }
2374
2375 void HTMLTreeBuilder::resetForeignInsertionMode()
2376 {
2377     if (insertionMode() == InForeignContentMode)
2378         resetInsertionModeAppropriately();
2379 }
2380
2381 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2382 {
2383     ASSERT(token.type() == HTMLToken::Comment);
2384     if (m_insertionMode == InitialMode
2385         || m_insertionMode == BeforeHTMLMode
2386         || m_insertionMode == AfterAfterBodyMode
2387         || m_insertionMode == AfterAfterFramesetMode) {
2388         m_tree.insertCommentOnDocument(token);
2389         return;
2390     }
2391     if (m_insertionMode == AfterBodyMode) {
2392         m_tree.insertCommentOnHTMLHtmlElement(token);
2393         return;
2394     }
2395     if (m_insertionMode == InTableTextMode) {
2396         defaultForInTableText();
2397         processComment(token);
2398         return;
2399     }
2400     m_tree.insertComment(token);
2401 }
2402
2403 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2404 {
2405     ASSERT(token.type() == HTMLToken::Character);
2406     ExternalCharacterTokenBuffer buffer(token);
2407     processCharacterBuffer(buffer);
2408 }
2409
2410 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2411 {
2412 ReprocessBuffer:
2413     switch (insertionMode()) {
2414     case InitialMode: {
2415         ASSERT(insertionMode() == InitialMode);
2416         buffer.skipLeadingWhitespace();
2417         if (buffer.isEmpty())
2418             return;
2419         defaultForInitial();
2420         // Fall through.
2421     }
2422     case BeforeHTMLMode: {
2423         ASSERT(insertionMode() == BeforeHTMLMode);
2424         buffer.skipLeadingWhitespace();
2425         if (buffer.isEmpty())
2426             return;
2427         defaultForBeforeHTML();
2428         // Fall through.
2429     }
2430     case BeforeHeadMode: {
2431         ASSERT(insertionMode() == BeforeHeadMode);
2432         buffer.skipLeadingWhitespace();
2433         if (buffer.isEmpty())
2434             return;
2435         defaultForBeforeHead();
2436         // Fall through.
2437     }
2438     case InHeadMode: {
2439         ASSERT(insertionMode() == InHeadMode);
2440         String leadingWhitespace = buffer.takeLeadingWhitespace();
2441         if (!leadingWhitespace.isEmpty())
2442             m_tree.insertTextNode(leadingWhitespace);
2443         if (buffer.isEmpty())
2444             return;
2445         defaultForInHead();
2446         // Fall through.
2447     }
2448     case AfterHeadMode: {
2449         ASSERT(insertionMode() == AfterHeadMode);
2450         String leadingWhitespace = buffer.takeLeadingWhitespace();
2451         if (!leadingWhitespace.isEmpty())
2452             m_tree.insertTextNode(leadingWhitespace);
2453         if (buffer.isEmpty())
2454             return;
2455         defaultForAfterHead();
2456         // Fall through.
2457     }
2458     case InBodyMode:
2459     case InCaptionMode:
2460     case InCellMode: {
2461         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2462         m_tree.reconstructTheActiveFormattingElements();
2463         String characters = buffer.takeRemaining();
2464         m_tree.insertTextNode(characters);
2465         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2466             m_framesetOk = false;
2467         break;
2468     }
2469     case InTableMode:
2470     case InTableBodyMode:
2471     case InRowMode: {
2472         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2473         ASSERT(m_pendingTableCharacters.isEmpty());
2474         m_originalInsertionMode = m_insertionMode;
2475         setInsertionMode(InTableTextMode);
2476         prepareToReprocessToken();
2477         // Fall through.
2478     }
2479     case InTableTextMode: {
2480         buffer.giveRemainingTo(m_pendingTableCharacters);
2481         break;
2482     }
2483     case InColumnGroupMode: {
2484         ASSERT(insertionMode() == InColumnGroupMode);
2485         String leadingWhitespace = buffer.takeLeadingWhitespace();
2486         if (!leadingWhitespace.isEmpty())
2487             m_tree.insertTextNode(leadingWhitespace);
2488         if (buffer.isEmpty())
2489             return;
2490         if (!processColgroupEndTagForInColumnGroup()) {
2491             ASSERT(isParsingFragment());
2492             // The spec tells us to drop these characters on the floor.
2493             buffer.takeLeadingNonWhitespace();
2494             if (buffer.isEmpty())
2495                 return;
2496         }
2497         prepareToReprocessToken();
2498         goto ReprocessBuffer;
2499     }
2500     case AfterBodyMode:
2501     case AfterAfterBodyMode: {
2502         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2503         // FIXME: parse error
2504         setInsertionMode(InBodyMode);
2505         prepareToReprocessToken();
2506         goto ReprocessBuffer;
2507         break;
2508     }
2509     case TextMode: {
2510         ASSERT(insertionMode() == TextMode);
2511         m_tree.insertTextNode(buffer.takeRemaining());
2512         break;
2513     }
2514     case InHeadNoscriptMode: {
2515         ASSERT(insertionMode() == InHeadNoscriptMode);
2516         String leadingWhitespace = buffer.takeLeadingWhitespace();
2517         if (!leadingWhitespace.isEmpty())
2518             m_tree.insertTextNode(leadingWhitespace);
2519         if (buffer.isEmpty())
2520             return;
2521         defaultForInHeadNoscript();
2522         goto ReprocessBuffer;
2523         break;
2524     }
2525     case InFramesetMode:
2526     case AfterFramesetMode: {
2527         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2528         String leadingWhitespace = buffer.takeRemainingWhitespace();
2529         if (!leadingWhitespace.isEmpty())
2530             m_tree.insertTextNode(leadingWhitespace);
2531         // FIXME: We should generate a parse error if we skipped over any
2532         // non-whitespace characters.
2533         break;
2534     }
2535     case InSelectInTableMode:
2536     case InSelectMode: {
2537         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2538         m_tree.insertTextNode(buffer.takeRemaining());
2539         break;
2540     }
2541     case InForeignContentMode: {
2542         ASSERT(insertionMode() == InForeignContentMode);
2543         String characters = buffer.takeRemaining();
2544         m_tree.insertTextNode(characters);
2545         if (m_framesetOk && !isAllWhitespace(characters))
2546             m_framesetOk = false;
2547         break;
2548     }
2549     case AfterAfterFramesetMode: {
2550         String leadingWhitespace = buffer.takeRemainingWhitespace();
2551         if (!leadingWhitespace.isEmpty()) {
2552             m_tree.reconstructTheActiveFormattingElements();
2553             m_tree.insertTextNode(leadingWhitespace);
2554         }
2555         // FIXME: We should generate a parse error if we skipped over any
2556         // non-whitespace characters.
2557         break;
2558     }
2559     }
2560 }
2561
2562 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2563 {
2564     ASSERT(token.type() == HTMLToken::EndOfFile);
2565     switch (insertionMode()) {
2566     case InitialMode:
2567         ASSERT(insertionMode() == InitialMode);
2568         defaultForInitial();
2569         // Fall through.
2570     case BeforeHTMLMode:
2571         ASSERT(insertionMode() == BeforeHTMLMode);
2572         defaultForBeforeHTML();
2573         // Fall through.
2574     case BeforeHeadMode:
2575         ASSERT(insertionMode() == BeforeHeadMode);
2576         defaultForBeforeHead();
2577         // Fall through.
2578     case InHeadMode:
2579         ASSERT(insertionMode() == InHeadMode);
2580         defaultForInHead();
2581         // Fall through.
2582     case AfterHeadMode:
2583         ASSERT(insertionMode() == AfterHeadMode);
2584         defaultForAfterHead();
2585         // Fall through
2586     case InBodyMode:
2587     case InCellMode:
2588     case InCaptionMode:
2589     case InRowMode:
2590         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2591         notImplemented(); // Emit parse error based on what elements are still open.
2592         break;
2593     case AfterBodyMode:
2594     case AfterAfterBodyMode:
2595         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2596         break;
2597     case InHeadNoscriptMode:
2598         ASSERT(insertionMode() == InHeadNoscriptMode);
2599         defaultForInHeadNoscript();
2600         processEndOfFile(token);
2601         return;
2602     case AfterFramesetMode:
2603     case AfterAfterFramesetMode:
2604         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2605         break;
2606     case InFramesetMode:
2607     case InTableMode:
2608     case InTableBodyMode:
2609     case InSelectInTableMode:
2610     case InSelectMode:
2611         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2612         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2613             parseError(token);
2614         break;
2615     case InColumnGroupMode:
2616         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2617             ASSERT(isParsingFragment());
2618             return; // FIXME: Should we break here instead of returning?
2619         }
2620         if (!processColgroupEndTagForInColumnGroup()) {
2621             ASSERT(isParsingFragment());
2622             return; // FIXME: Should we break here instead of returning?
2623         }
2624         prepareToReprocessToken();
2625         processEndOfFile(token);
2626         return;
2627     case InForeignContentMode:
2628         setInsertionMode(InBodyMode);
2629         processEndOfFile(token);
2630         return;
2631     case InTableTextMode:
2632         defaultForInTableText();
2633         processEndOfFile(token);
2634         return;
2635     case TextMode:
2636         parseError(token);
2637         if (m_tree.currentElement()->hasTagName(scriptTag))
2638             notImplemented(); // mark the script element as "already started".
2639         m_tree.openElements()->pop();
2640         setInsertionMode(m_originalInsertionMode);
2641         prepareToReprocessToken();
2642         processEndOfFile(token);
2643         return;
2644     }
2645     ASSERT(m_tree.openElements()->top());
2646     m_tree.openElements()->popAll();
2647 }
2648
2649 void HTMLTreeBuilder::defaultForInitial()
2650 {
2651     notImplemented();
2652     if (!m_fragmentContext.fragment())
2653         m_document->setCompatibilityMode(Document::QuirksMode);
2654     // FIXME: parse error
2655     setInsertionMode(BeforeHTMLMode);
2656     prepareToReprocessToken();
2657 }
2658
2659 void HTMLTreeBuilder::defaultForBeforeHTML()
2660 {
2661     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2662     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2663     setInsertionMode(BeforeHeadMode);
2664     prepareToReprocessToken();
2665 }
2666
2667 void HTMLTreeBuilder::defaultForBeforeHead()
2668 {
2669     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2670     processStartTag(startHead);
2671     prepareToReprocessToken();
2672 }
2673
2674 void HTMLTreeBuilder::defaultForInHead()
2675 {
2676     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2677     processEndTag(endHead);
2678     prepareToReprocessToken();
2679 }
2680
2681 void HTMLTreeBuilder::defaultForInHeadNoscript()
2682 {
2683     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2684     processEndTag(endNoscript);
2685     prepareToReprocessToken();
2686 }
2687
2688 void HTMLTreeBuilder::defaultForAfterHead()
2689 {
2690     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2691     processStartTag(startBody);
2692     m_framesetOk = true;
2693     prepareToReprocessToken();
2694 }
2695
2696 void HTMLTreeBuilder::defaultForInTableText()
2697 {
2698     String characters = String::adopt(m_pendingTableCharacters);
2699     if (!isAllWhitespace(characters)) {
2700         // FIXME: parse error
2701         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2702         m_tree.reconstructTheActiveFormattingElements();
2703         m_tree.insertTextNode(characters);
2704         m_framesetOk = false;
2705         setInsertionMode(m_originalInsertionMode);
2706         prepareToReprocessToken();
2707         return;
2708     }
2709     m_tree.insertTextNode(characters);
2710     setInsertionMode(m_originalInsertionMode);
2711     prepareToReprocessToken();
2712 }
2713
2714 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2715 {
2716     ASSERT(token.type() == HTMLToken::StartTag);
2717     if (token.name() == htmlTag) {
2718         m_tree.insertHTMLHtmlStartTagInBody(token);
2719         return true;
2720     }
2721     if (token.name() == baseTag
2722         || token.name() == basefontTag
2723         || token.name() == bgsoundTag
2724         || token.name() == commandTag
2725         || token.name() == linkTag
2726         || token.name() == metaTag) {
2727         m_tree.insertSelfClosingHTMLElement(token);
2728         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2729         return true;
2730     }
2731     if (token.name() == titleTag) {
2732         processGenericRCDATAStartTag(token);
2733         return true;
2734     }
2735     if (token.name() == noscriptTag) {
2736         if (scriptEnabled(m_document->frame())) {
2737             processGenericRawTextStartTag(token);
2738             return true;
2739         }
2740         m_tree.insertHTMLElement(token);
2741         setInsertionMode(InHeadNoscriptMode);
2742         return true;
2743     }
2744     if (token.name() == noframesTag || token.name() == styleTag) {
2745         processGenericRawTextStartTag(token);
2746         return true;
2747     }
2748     if (token.name() == scriptTag) {
2749         processScriptStartTag(token);
2750         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2751             processFakeEndTag(scriptTag);
2752         return true;
2753     }
2754     if (token.name() == headTag) {
2755         parseError(token);
2756         return true;
2757     }
2758     return false;
2759 }
2760
2761 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2762 {
2763     ASSERT(token.type() == HTMLToken::StartTag);
2764     m_tree.insertHTMLElement(token);
2765     m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2766     m_originalInsertionMode = m_insertionMode;
2767     setInsertionMode(TextMode);
2768 }
2769
2770 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2771 {
2772     ASSERT(token.type() == HTMLToken::StartTag);
2773     m_tree.insertHTMLElement(token);
2774     m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2775     m_originalInsertionMode = m_insertionMode;
2776     setInsertionMode(TextMode);
2777 }
2778
2779 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2780 {
2781     ASSERT(token.type() == HTMLToken::StartTag);
2782     m_tree.insertScriptElement(token);
2783     m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2784     m_originalInsertionMode = m_insertionMode;
2785
2786     TextPosition0 position = m_parser->textPosition();
2787
2788     ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2789
2790     m_lastScriptElementStartPosition = position;
2791
2792     setInsertionMode(TextMode);
2793 }
2794
2795 void HTMLTreeBuilder::finished()
2796 {
2797     ASSERT(m_document);
2798     if (isParsingFragment()) {
2799         m_fragmentContext.finished();
2800         return;
2801     }
2802
2803     // Warning, this may detach the parser. Do not do anything else after this.
2804     m_document->finishedParsing();
2805 }
2806
2807 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2808 {
2809 }
2810
2811 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2812 {
2813     if (!frame)
2814         return false;
2815     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2816 }
2817
2818 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2819 {
2820     if (!frame)
2821         return false;
2822     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2823 }
2824
2825 }