2010-09-13 Adam Barth <abarth@webkit.org>
[WebKit-https.git] / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "Comment.h"
30 #include "DocumentFragment.h"
31 #include "DocumentType.h"
32 #include "Element.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLElementFactory.h"
36 #include "HTMLFormElement.h"
37 #include "HTMLHtmlElement.h"
38 #include "HTMLNames.h"
39 #include "HTMLScriptElement.h"
40 #include "HTMLToken.h"
41 #include "HTMLTokenizer.h"
42 #include "LocalizedStrings.h"
43 #include "MathMLNames.h"
44 #include "NotImplemented.h"
45 #include "SVGNames.h"
46 #include "ScriptController.h"
47 #include "Settings.h"
48 #include "Text.h"
49 #include "XLinkNames.h"
50 #include "XMLNSNames.h"
51 #include "XMLNames.h"
52 // FIXME: Remove this include once we find a home for the free functions that
53 // are using it.
54 #include <wtf/dtoa.h>
55 #include <wtf/UnusedParam.h>
56
57 namespace WebCore {
58
59 using namespace HTMLNames;
60
61 static const int uninitializedLineNumberValue = -1;
62
63 namespace {
64
65 inline bool isTreeBuilderWhitepace(UChar c)
66 {
67     // FIXME: Consider branch permutations.
68     return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' ';
69 }
70
71 inline bool isNotTreeBuilderWhitepace(UChar c)
72 {
73     return !isTreeBuilderWhitepace(c);
74 }
75
76 inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c)
77 {
78     return isTreeBuilderWhitepace(c) || c == 0xFFFD;
79 }
80
81 template<bool isSpecialCharacter(UChar c)>
82 inline bool isAllSpecialCharacters(const String& string)
83 {
84     const UChar* characters = string.characters();
85     const unsigned length = string.length();
86     for (unsigned i = 0; i < length; ++i) {
87         if (!isSpecialCharacter(characters[i]))
88             return false;
89     }
90     return true;
91 }
92
93 inline bool isAllWhitespace(const String& string)
94 {
95     return isAllSpecialCharacters<isTreeBuilderWhitepace>(string);
96 }
97
98 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
99 {
100     return isAllSpecialCharacters<isTreeBuilderWhitepaceOrReplacementCharacter>(string);
101 }
102
103 bool isNumberedHeaderTag(const AtomicString& tagName)
104 {
105     return tagName == h1Tag
106         || tagName == h2Tag
107         || tagName == h3Tag
108         || tagName == h4Tag
109         || tagName == h5Tag
110         || tagName == h6Tag;
111 }
112
113 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
114 {
115     return tagName == captionTag
116         || tagName == colTag
117         || tagName == colgroupTag;
118 }
119
120 bool isTableCellContextTag(const AtomicString& tagName)
121 {
122     return tagName == thTag || tagName == tdTag;
123 }
124
125 bool isTableBodyContextTag(const AtomicString& tagName)
126 {
127     return tagName == tbodyTag
128         || tagName == tfootTag
129         || tagName == theadTag;
130 }
131
132 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
133 bool isSpecialNode(Node* node)
134 {
135     if (node->hasTagName(SVGNames::foreignObjectTag))
136         return true;
137     if (node->namespaceURI() != xhtmlNamespaceURI)
138         return false;
139     const AtomicString& tagName = node->localName();
140     return tagName == addressTag
141         || tagName == appletTag
142         || tagName == areaTag
143         || tagName == articleTag
144         || tagName == asideTag
145         || tagName == baseTag
146         || tagName == basefontTag
147         || tagName == bgsoundTag
148         || tagName == blockquoteTag
149         || tagName == bodyTag
150         || tagName == brTag
151         || tagName == buttonTag
152         || tagName == captionTag
153         || tagName == centerTag
154         || tagName == colTag
155         || tagName == colgroupTag
156         || tagName == commandTag
157         || tagName == ddTag
158         || tagName == detailsTag
159         || tagName == dirTag
160         || tagName == divTag
161         || tagName == dlTag
162         || tagName == dtTag
163         || tagName == embedTag
164         || tagName == fieldsetTag
165         || tagName == figcaptionTag
166         || tagName == figureTag
167         || tagName == footerTag
168         || tagName == formTag
169         || tagName == frameTag
170         || tagName == framesetTag
171         || isNumberedHeaderTag(tagName)
172         || tagName == headTag
173         || tagName == headerTag
174         || tagName == hgroupTag
175         || tagName == hrTag
176         || tagName == htmlTag
177         || tagName == iframeTag
178         || tagName == imgTag
179         || tagName == inputTag
180         || tagName == isindexTag
181         || tagName == liTag
182         || tagName == linkTag
183         || tagName == listingTag
184         || tagName == marqueeTag
185         || tagName == menuTag
186         || tagName == metaTag
187         || tagName == navTag
188         || tagName == noembedTag
189         || tagName == noframesTag
190         || tagName == noscriptTag
191         || tagName == objectTag
192         || tagName == olTag
193         || tagName == pTag
194         || tagName == paramTag
195         || tagName == plaintextTag
196         || tagName == preTag
197         || tagName == scriptTag
198         || tagName == sectionTag
199         || tagName == selectTag
200         || tagName == styleTag
201         || tagName == summaryTag
202         || tagName == tableTag
203         || isTableBodyContextTag(tagName)
204         || tagName == tdTag
205         || tagName == textareaTag
206         || tagName == thTag
207         || tagName == titleTag
208         || tagName == trTag
209         || tagName == ulTag
210         || tagName == wbrTag
211         || tagName == xmpTag;
212 }
213
214 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
215 {
216     return tagName == bTag
217         || tagName == bigTag
218         || tagName == codeTag
219         || tagName == emTag
220         || tagName == fontTag
221         || tagName == iTag
222         || tagName == sTag
223         || tagName == smallTag
224         || tagName == strikeTag
225         || tagName == strongTag
226         || tagName == ttTag
227         || tagName == uTag;
228 }
229
230 bool isNonAnchorFormattingTag(const AtomicString& tagName)
231 {
232     return tagName == nobrTag
233         || isNonAnchorNonNobrFormattingTag(tagName);
234 }
235
236 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
237 bool isFormattingTag(const AtomicString& tagName)
238 {
239     return tagName == aTag || isNonAnchorFormattingTag(tagName);
240 }
241
242 HTMLFormElement* closestFormAncestor(Element* element)
243 {
244     while (element) {
245         if (element->hasTagName(formTag))
246             return static_cast<HTMLFormElement*>(element);
247         Node* parent = element->parent();
248         if (!parent || !parent->isElementNode())
249             return 0;
250         element = static_cast<Element*>(parent);
251     }
252     return 0;
253 }
254
255 } // namespace
256
257 class HTMLTreeBuilder::ExternalCharacterTokenBuffer : public Noncopyable {
258 public:
259     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
260         : m_current(token.characters().data())
261         , m_end(m_current + token.characters().size())
262     {
263         ASSERT(!isEmpty());
264     }
265
266     explicit ExternalCharacterTokenBuffer(const String& string)
267         : m_current(string.characters())
268         , m_end(m_current + string.length())
269     {
270         ASSERT(!isEmpty());
271     }
272
273     ~ExternalCharacterTokenBuffer()
274     {
275         ASSERT(isEmpty());
276     }
277
278     bool isEmpty() const { return m_current == m_end; }
279
280     void skipLeadingWhitespace()
281     {
282         skipLeading<isTreeBuilderWhitepace>();
283     }
284
285     String takeLeadingWhitespace()
286     {
287         return takeLeading<isTreeBuilderWhitepace>();
288     }
289
290     String takeLeadingNonWhitespace()
291     {
292         return takeLeading<isNotTreeBuilderWhitepace>();
293     }
294
295     String takeRemaining()
296     {
297         ASSERT(!isEmpty());
298         const UChar* start = m_current;
299         m_current = m_end;
300         return String(start, m_current - start);
301     }
302
303     void giveRemainingTo(Vector<UChar>& recipient)
304     {
305         recipient.append(m_current, m_end - m_current);
306         m_current = m_end;
307     }
308
309     String takeRemainingWhitespace()
310     {
311         ASSERT(!isEmpty());
312         Vector<UChar> whitespace;
313         do {
314             UChar cc = *m_current++;
315             if (isTreeBuilderWhitepace(cc))
316                 whitespace.append(cc);
317         } while (m_current < m_end);
318         // Returning the null string when there aren't any whitespace
319         // characters is slightly cleaner semantically because we don't want
320         // to insert a text node (as opposed to inserting an empty text node).
321         if (whitespace.isEmpty())
322             return String();
323         return String::adopt(whitespace);
324     }
325
326 private:
327     template<bool characterPredicate(UChar)>
328     void skipLeading()
329     {
330         ASSERT(!isEmpty());
331         while (characterPredicate(*m_current)) {
332             if (++m_current == m_end)
333                 return;
334         }
335     }
336
337     template<bool characterPredicate(UChar)>
338     String takeLeading()
339     {
340         ASSERT(!isEmpty());
341         const UChar* start = m_current;
342         skipLeading<characterPredicate>();
343         if (start == m_current)
344             return String();
345         return String(start, m_current - start);
346     }
347
348     const UChar* m_current;
349     const UChar* m_end;
350 };
351
352
353 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors)
354     : m_framesetOk(true)
355     , m_document(document)
356     , m_tree(document, FragmentScriptingAllowed, false)
357     , m_reportErrors(reportErrors)
358     , m_isPaused(false)
359     , m_insertionMode(InitialMode)
360     , m_originalInsertionMode(InitialMode)
361     , m_secondaryInsertionMode(InitialMode)
362     , m_tokenizer(tokenizer)
363     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
364     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
365 {
366 }
367
368 // FIXME: Member variables should be grouped into self-initializing structs to
369 // minimize code duplication between these constructors.
370 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
371     : m_framesetOk(true)
372     , m_fragmentContext(fragment, contextElement, scriptingPermission)
373     , m_document(m_fragmentContext.document())
374     , m_tree(m_document, scriptingPermission, true)
375     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
376     , m_isPaused(false)
377     , m_insertionMode(InitialMode)
378     , m_originalInsertionMode(InitialMode)
379     , m_secondaryInsertionMode(InitialMode)
380     , m_tokenizer(tokenizer)
381     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
382     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
383 {
384     if (contextElement) {
385         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
386         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
387         m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
388         processFakeStartTag(htmlTag);
389         resetInsertionModeAppropriately();
390         m_tree.setForm(closestFormAncestor(contextElement));
391     }
392 }
393
394 HTMLTreeBuilder::~HTMLTreeBuilder()
395 {
396 }
397
398 void HTMLTreeBuilder::detach()
399 {
400     // This call makes little sense in fragment mode, but for consistency
401     // DocumentParser expects detach() to always be called before it's destroyed.
402     m_document = 0;
403     // HTMLConstructionSite might be on the callstack when detach() is called
404     // otherwise we'd just call m_tree.clear() here instead.
405     m_tree.detach();
406 }
407
408 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
409     : m_fragment(0)
410     , m_contextElement(0)
411     , m_scriptingPermission(FragmentScriptingAllowed)
412 {
413 }
414
415 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
416     : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
417     , m_fragment(fragment)
418     , m_contextElement(contextElement)
419     , m_scriptingPermission(scriptingPermission)
420 {
421     m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
422 }
423
424 Document* HTMLTreeBuilder::FragmentParsingContext::document() const
425 {
426     ASSERT(m_fragment);
427     return m_dummyDocumentForFragmentParsing.get();
428 }
429
430 void HTMLTreeBuilder::FragmentParsingContext::finished()
431 {
432     // Populate the DocumentFragment with the parsed content now that we're done.
433     ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
434     if (m_contextElement)
435         root = m_dummyDocumentForFragmentParsing->documentElement();
436     m_fragment->takeAllChildrenFrom(root);
437 }
438
439 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
440 {
441 }
442
443 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
444 {
445     // Unpause ourselves, callers may pause us again when processing the script.
446     // The HTML5 spec is written as though scripts are executed inside the tree
447     // builder.  We pause the parser to exit the tree builder, and then resume
448     // before running scripts.
449     m_isPaused = false;
450     scriptStartLine = m_scriptToProcessStartLine;
451     m_scriptToProcessStartLine = uninitializedLineNumberValue;
452     return m_scriptToProcess.release();
453 }
454
455 HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
456 {
457     if (tagName == textareaTag || tagName == titleTag)
458         return HTMLTokenizer::RCDATAState;
459
460     if (tagName == styleTag
461         || tagName == iframeTag
462         || tagName == xmpTag
463         || (tagName == noembedTag && pluginsEnabled(frame))
464         || tagName == noframesTag
465         || (tagName == noscriptTag && scriptEnabled(frame)))
466         return HTMLTokenizer::RAWTEXTState;
467
468     if (tagName == plaintextTag)
469         return HTMLTokenizer::PLAINTEXTState;
470
471     return state;
472 }
473
474 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
475 {
476     AtomicHTMLToken token(rawToken);
477     constructTreeFromAtomicToken(token);
478 }
479
480 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
481 {
482     processToken(token);
483
484     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
485     // the U+0000 characters into replacement characters has compatibility
486     // problems.
487     m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
488     m_tokenizer->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
489 }
490
491 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
492 {
493     switch (token.type()) {
494     case HTMLToken::Uninitialized:
495         ASSERT_NOT_REACHED();
496         break;
497     case HTMLToken::DOCTYPE:
498         processDoctypeToken(token);
499         break;
500     case HTMLToken::StartTag:
501         processStartTag(token);
502         break;
503     case HTMLToken::EndTag:
504         processEndTag(token);
505         break;
506     case HTMLToken::Comment:
507         processComment(token);
508         return;
509     case HTMLToken::Character:
510         processCharacter(token);
511         break;
512     case HTMLToken::EndOfFile:
513         processEndOfFile(token);
514         break;
515     }
516 }
517
518 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
519 {
520     ASSERT(token.type() == HTMLToken::DOCTYPE);
521     if (m_insertionMode == InitialMode) {
522         m_tree.insertDoctype(token);
523         setInsertionMode(BeforeHTMLMode);
524         return;
525     }
526     if (m_insertionMode == InTableTextMode) {
527         defaultForInTableText();
528         processDoctypeToken(token);
529         return;
530     }
531     parseError(token);
532 }
533
534 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
535 {
536     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
537     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
538     processStartTag(fakeToken);
539 }
540
541 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
542 {
543     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
544     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
545     processEndTag(fakeToken);
546 }
547
548 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
549 {
550     ASSERT(!characters.isEmpty());
551     ExternalCharacterTokenBuffer buffer(characters);
552     processCharacterBuffer(buffer);
553 }
554
555 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
556 {
557     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
558         return;
559     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
560     processEndTag(endP);
561 }
562
563 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
564 {
565     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
566     if (!attributes)
567         attributes = NamedNodeMap::create();
568     else {
569         attributes->removeAttribute(nameAttr);
570         attributes->removeAttribute(actionAttr);
571         attributes->removeAttribute(promptAttr);
572     }
573
574     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
575     attributes->insertAttribute(mappedAttribute.release(), false);
576     return attributes.release();
577 }
578
579 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
580 {
581     ASSERT(token.type() == HTMLToken::StartTag);
582     ASSERT(token.name() == isindexTag);
583     parseError(token);
584     if (m_tree.form())
585         return;
586     notImplemented(); // Acknowledge self-closing flag
587     processFakeStartTag(formTag);
588     Attribute* actionAttribute = token.getAttributeItem(actionAttr);
589     if (actionAttribute) {
590         ASSERT(m_tree.currentElement()->hasTagName(formTag));
591         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
592     }
593     processFakeStartTag(hrTag);
594     processFakeStartTag(labelTag);
595     Attribute* promptAttribute = token.getAttributeItem(promptAttr);
596     if (promptAttribute)
597         processFakeCharacters(promptAttribute->value());
598     else
599         processFakeCharacters(searchableIndexIntroduction());
600     processFakeStartTag(inputTag, attributesForIsindexInput(token));
601     notImplemented(); // This second set of characters may be needed by non-english locales.
602     processFakeEndTag(labelTag);
603     processFakeStartTag(hrTag);
604     processFakeEndTag(formTag);
605 }
606
607 namespace {
608
609 bool isLi(const Element* element)
610 {
611     return element->hasTagName(liTag);
612 }
613
614 bool isDdOrDt(const Element* element)
615 {
616     return element->hasTagName(ddTag)
617         || element->hasTagName(dtTag);
618 }
619
620 }
621
622 template <bool shouldClose(const Element*)>
623 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
624 {
625     m_framesetOk = false;
626     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
627     while (1) {
628         Element* node = nodeRecord->element();
629         if (shouldClose(node)) {
630             processFakeEndTag(node->tagQName());
631             break;
632         }
633         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
634             break;
635         nodeRecord = nodeRecord->next();
636     }
637     processFakePEndTagIfPInButtonScope();
638     m_tree.insertHTMLElement(token);
639 }
640
641 namespace {
642
643 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
644
645 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
646 {
647     for (size_t i = 0; i < length; ++i) {
648         const QualifiedName& name = *names[i];
649         const AtomicString& localName = name.localName();
650         AtomicString loweredLocalName = localName.lower();
651         if (loweredLocalName != localName)
652             map->add(loweredLocalName, name);
653     }
654 }
655
656 void adjustSVGTagNameCase(AtomicHTMLToken& token)
657 {
658     static PrefixedNameToQualifiedNameMap* caseMap = 0;
659     if (!caseMap) {
660         caseMap = new PrefixedNameToQualifiedNameMap;
661         size_t length = 0;
662         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
663         mapLoweredLocalNameToName(caseMap, svgTags, length);
664     }
665
666     const QualifiedName& casedName = caseMap->get(token.name());
667     if (casedName.localName().isNull())
668         return;
669     token.setName(casedName.localName());
670 }
671
672 template<QualifiedName** getAttrs(size_t* length)>
673 void adjustAttributes(AtomicHTMLToken& token)
674 {
675     static PrefixedNameToQualifiedNameMap* caseMap = 0;
676     if (!caseMap) {
677         caseMap = new PrefixedNameToQualifiedNameMap;
678         size_t length = 0;
679         QualifiedName** attrs = getAttrs(&length);
680         mapLoweredLocalNameToName(caseMap, attrs, length);
681     }
682
683     NamedNodeMap* attributes = token.attributes();
684     if (!attributes)
685         return;
686
687     for (unsigned x = 0; x < attributes->length(); ++x) {
688         Attribute* attribute = attributes->attributeItem(x);
689         const QualifiedName& casedName = caseMap->get(attribute->localName());
690         if (!casedName.localName().isNull())
691             attribute->parserSetName(casedName);
692     }
693 }
694
695 void adjustSVGAttributes(AtomicHTMLToken& token)
696 {
697     adjustAttributes<SVGNames::getSVGAttrs>(token);
698 }
699
700 void adjustMathMLAttributes(AtomicHTMLToken& token)
701 {
702     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
703 }
704
705 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
706 {
707     for (size_t i = 0; i < length; ++i) {
708         QualifiedName* name = names[i];
709         const AtomicString& localName = name->localName();
710         AtomicString prefixColonLocalName(prefix + ":" + localName);
711         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
712         map->add(prefixColonLocalName, nameWithPrefix);
713     }
714 }
715
716 void adjustForeignAttributes(AtomicHTMLToken& token)
717 {
718     static PrefixedNameToQualifiedNameMap* map = 0;
719     if (!map) {
720         map = new PrefixedNameToQualifiedNameMap;
721         size_t length = 0;
722         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
723         addNamesWithPrefix(map, "xlink", attrs, length);
724
725         attrs = XMLNames::getXMLAttrs(&length);
726         addNamesWithPrefix(map, "xml", attrs, length);
727
728         map->add("xmlns", XMLNSNames::xmlnsAttr);
729         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
730     }
731
732     NamedNodeMap* attributes = token.attributes();
733     if (!attributes)
734         return;
735
736     for (unsigned x = 0; x < attributes->length(); ++x) {
737         Attribute* attribute = attributes->attributeItem(x);
738         const QualifiedName& name = map->get(attribute->localName());
739         if (!name.localName().isNull())
740             attribute->parserSetName(name);
741     }
742 }
743
744 }
745
746 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
747 {
748     ASSERT(token.type() == HTMLToken::StartTag);
749     if (token.name() == htmlTag) {
750         m_tree.insertHTMLHtmlStartTagInBody(token);
751         return;
752     }
753     if (token.name() == baseTag
754         || token.name() == basefontTag
755         || token.name() == bgsoundTag
756         || token.name() == commandTag
757         || token.name() == linkTag
758         || token.name() == metaTag
759         || token.name() == noframesTag
760         || token.name() == scriptTag
761         || token.name() == styleTag
762         || token.name() == titleTag) {
763         bool didProcess = processStartTagForInHead(token);
764         ASSERT_UNUSED(didProcess, didProcess);
765         return;
766     }
767     if (token.name() == bodyTag) {
768         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
769             ASSERT(isParsingFragment());
770             return;
771         }
772         m_tree.insertHTMLBodyStartTagInBody(token);
773         return;
774     }
775     if (token.name() == framesetTag) {
776         parseError(token);
777         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
778             ASSERT(isParsingFragment());
779             return;
780         }
781         if (!m_framesetOk)
782             return;
783         ExceptionCode ec = 0;
784         m_tree.openElements()->bodyElement()->remove(ec);
785         ASSERT(!ec);
786         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
787         m_tree.openElements()->popHTMLBodyElement();
788         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
789         m_tree.insertHTMLElement(token);
790         setInsertionMode(InFramesetMode);
791         return;
792     }
793     if (token.name() == addressTag
794         || token.name() == articleTag
795         || token.name() == asideTag
796         || token.name() == blockquoteTag
797         || token.name() == centerTag
798         || token.name() == detailsTag
799         || token.name() == dirTag
800         || token.name() == divTag
801         || token.name() == dlTag
802         || token.name() == fieldsetTag
803         || token.name() == figcaptionTag
804         || token.name() == figureTag
805         || token.name() == footerTag
806         || token.name() == headerTag
807         || token.name() == hgroupTag
808         || token.name() == menuTag
809         || token.name() == navTag
810         || token.name() == olTag
811         || token.name() == pTag
812         || token.name() == sectionTag
813         || token.name() == summaryTag
814         || token.name() == ulTag) {
815         processFakePEndTagIfPInButtonScope();
816         m_tree.insertHTMLElement(token);
817         return;
818     }
819     if (isNumberedHeaderTag(token.name())) {
820         processFakePEndTagIfPInButtonScope();
821         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
822             parseError(token);
823             m_tree.openElements()->pop();
824         }
825         m_tree.insertHTMLElement(token);
826         return;
827     }
828     if (token.name() == preTag || token.name() == listingTag) {
829         processFakePEndTagIfPInButtonScope();
830         m_tree.insertHTMLElement(token);
831         m_tokenizer->setSkipLeadingNewLineForListing(true);
832         m_framesetOk = false;
833         return;
834     }
835     if (token.name() == formTag) {
836         if (m_tree.form()) {
837             parseError(token);
838             return;
839         }
840         processFakePEndTagIfPInButtonScope();
841         m_tree.insertHTMLFormElement(token);
842         return;
843     }
844     if (token.name() == liTag) {
845         processCloseWhenNestedTag<isLi>(token);
846         return;
847     }
848     if (token.name() == ddTag || token.name() == dtTag) {
849         processCloseWhenNestedTag<isDdOrDt>(token);
850         return;
851     }
852     if (token.name() == plaintextTag) {
853         processFakePEndTagIfPInButtonScope();
854         m_tree.insertHTMLElement(token);
855         m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
856         return;
857     }
858     if (token.name() == buttonTag) {
859         if (m_tree.openElements()->inScope(buttonTag)) {
860             parseError(token);
861             processFakeEndTag(buttonTag);
862             processStartTag(token); // FIXME: Could we just fall through here?
863             return;
864         }
865         m_tree.reconstructTheActiveFormattingElements();
866         m_tree.insertHTMLElement(token);
867         m_framesetOk = false;
868         return;
869     }
870     if (token.name() == aTag) {
871         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
872         if (activeATag) {
873             parseError(token);
874             processFakeEndTag(aTag);
875             m_tree.activeFormattingElements()->remove(activeATag);
876             if (m_tree.openElements()->contains(activeATag))
877                 m_tree.openElements()->remove(activeATag);
878         }
879         m_tree.reconstructTheActiveFormattingElements();
880         m_tree.insertFormattingElement(token);
881         return;
882     }
883     if (isNonAnchorNonNobrFormattingTag(token.name())) {
884         m_tree.reconstructTheActiveFormattingElements();
885         m_tree.insertFormattingElement(token);
886         return;
887     }
888     if (token.name() == nobrTag) {
889         m_tree.reconstructTheActiveFormattingElements();
890         if (m_tree.openElements()->inScope(nobrTag)) {
891             parseError(token);
892             processFakeEndTag(nobrTag);
893             m_tree.reconstructTheActiveFormattingElements();
894         }
895         m_tree.insertFormattingElement(token);
896         return;
897     }
898     if (token.name() == appletTag
899         || token.name() == marqueeTag
900         || token.name() == objectTag) {
901         m_tree.reconstructTheActiveFormattingElements();
902         m_tree.insertHTMLElement(token);
903         m_tree.activeFormattingElements()->appendMarker();
904         m_framesetOk = false;
905         return;
906     }
907     if (token.name() == tableTag) {
908         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
909             processFakeEndTag(pTag);
910         m_tree.insertHTMLElement(token);
911         m_framesetOk = false;
912         setInsertionMode(InTableMode);
913         return;
914     }
915     if (token.name() == imageTag) {
916         parseError(token);
917         // Apparently we're not supposed to ask.
918         token.setName(imgTag.localName());
919         // Note the fall through to the imgTag handling below!
920     }
921     if (token.name() == areaTag
922         || token.name() == brTag
923         || token.name() == embedTag
924         || token.name() == imgTag
925         || token.name() == inputTag
926         || token.name() == keygenTag
927         || token.name() == wbrTag) {
928         m_tree.reconstructTheActiveFormattingElements();
929         m_tree.insertSelfClosingHTMLElement(token);
930         m_framesetOk = false;
931         return;
932     }
933     if (token.name() == paramTag
934         || token.name() == sourceTag
935         || token.name() == trackTag) {
936         m_tree.insertSelfClosingHTMLElement(token);
937         return;
938     }
939     if (token.name() == hrTag) {
940         processFakePEndTagIfPInButtonScope();
941         m_tree.insertSelfClosingHTMLElement(token);
942         m_framesetOk = false;
943         return;
944     }
945     if (token.name() == isindexTag) {
946         processIsindexStartTagForInBody(token);
947         return;
948     }
949     if (token.name() == textareaTag) {
950         m_tree.insertHTMLElement(token);
951         m_tokenizer->setSkipLeadingNewLineForListing(true);
952         m_tokenizer->setState(HTMLTokenizer::RCDATAState);
953         m_originalInsertionMode = m_insertionMode;
954         m_framesetOk = false;
955         setInsertionMode(TextMode);
956         return;
957     }
958     if (token.name() == xmpTag) {
959         processFakePEndTagIfPInButtonScope();
960         m_tree.reconstructTheActiveFormattingElements();
961         m_framesetOk = false;
962         processGenericRawTextStartTag(token);
963         return;
964     }
965     if (token.name() == iframeTag) {
966         m_framesetOk = false;
967         processGenericRawTextStartTag(token);
968         return;
969     }
970     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
971         processGenericRawTextStartTag(token);
972         return;
973     }
974     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
975         processGenericRawTextStartTag(token);
976         return;
977     }
978     if (token.name() == selectTag) {
979         m_tree.reconstructTheActiveFormattingElements();
980         m_tree.insertHTMLElement(token);
981         m_framesetOk = false;
982         if (m_insertionMode == InTableMode
983              || m_insertionMode == InCaptionMode
984              || m_insertionMode == InColumnGroupMode
985              || m_insertionMode == InTableBodyMode
986              || m_insertionMode == InRowMode
987              || m_insertionMode == InCellMode)
988             setInsertionMode(InSelectInTableMode);
989         else
990             setInsertionMode(InSelectMode);
991         return;
992     }
993     if (token.name() == optgroupTag || token.name() == optionTag) {
994         if (m_tree.openElements()->inScope(optionTag.localName())) {
995             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
996             processEndTag(endOption);
997         }
998         m_tree.reconstructTheActiveFormattingElements();
999         m_tree.insertHTMLElement(token);
1000         return;
1001     }
1002     if (token.name() == rpTag || token.name() == rtTag) {
1003         if (m_tree.openElements()->inScope(rubyTag.localName())) {
1004             m_tree.generateImpliedEndTags();
1005             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
1006                 parseError(token);
1007                 m_tree.openElements()->popUntil(rubyTag.localName());
1008             }
1009         }
1010         m_tree.insertHTMLElement(token);
1011         return;
1012     }
1013     if (token.name() == MathMLNames::mathTag.localName()) {
1014         m_tree.reconstructTheActiveFormattingElements();
1015         adjustMathMLAttributes(token);
1016         adjustForeignAttributes(token);
1017         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1018         if (m_insertionMode != InForeignContentMode) {
1019             setSecondaryInsertionMode(m_insertionMode);
1020             setInsertionMode(InForeignContentMode);
1021         }
1022         return;
1023     }
1024     if (token.name() == SVGNames::svgTag.localName()) {
1025         m_tree.reconstructTheActiveFormattingElements();
1026         adjustSVGAttributes(token);
1027         adjustForeignAttributes(token);
1028         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1029         if (m_insertionMode != InForeignContentMode) {
1030             setSecondaryInsertionMode(m_insertionMode);
1031             setInsertionMode(InForeignContentMode);
1032         }
1033         return;
1034     }
1035     if (isCaptionColOrColgroupTag(token.name())
1036         || token.name() == frameTag
1037         || token.name() == headTag
1038         || isTableBodyContextTag(token.name())
1039         || isTableCellContextTag(token.name())
1040         || token.name() == trTag) {
1041         parseError(token);
1042         return;
1043     }
1044     m_tree.reconstructTheActiveFormattingElements();
1045     m_tree.insertHTMLElement(token);
1046 }
1047
1048 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1049 {
1050     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1051         ASSERT(isParsingFragment());
1052         // FIXME: parse error
1053         return false;
1054     }
1055     m_tree.openElements()->pop();
1056     setInsertionMode(InTableMode);
1057     return true;
1058 }
1059
1060 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1061 void HTMLTreeBuilder::closeTheCell()
1062 {
1063     ASSERT(insertionMode() == InCellMode);
1064     if (m_tree.openElements()->inTableScope(tdTag)) {
1065         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1066         processFakeEndTag(tdTag);
1067         return;
1068     }
1069     ASSERT(m_tree.openElements()->inTableScope(thTag));
1070     processFakeEndTag(thTag);
1071     ASSERT(insertionMode() == InRowMode);
1072 }
1073
1074 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1075 {
1076     ASSERT(token.type() == HTMLToken::StartTag);
1077     if (token.name() == captionTag) {
1078         m_tree.openElements()->popUntilTableScopeMarker();
1079         m_tree.activeFormattingElements()->appendMarker();
1080         m_tree.insertHTMLElement(token);
1081         setInsertionMode(InCaptionMode);
1082         return;
1083     }
1084     if (token.name() == colgroupTag) {
1085         m_tree.openElements()->popUntilTableScopeMarker();
1086         m_tree.insertHTMLElement(token);
1087         setInsertionMode(InColumnGroupMode);
1088         return;
1089     }
1090     if (token.name() == colTag) {
1091         processFakeStartTag(colgroupTag);
1092         ASSERT(InColumnGroupMode);
1093         processStartTag(token);
1094         return;
1095     }
1096     if (isTableBodyContextTag(token.name())) {
1097         m_tree.openElements()->popUntilTableScopeMarker();
1098         m_tree.insertHTMLElement(token);
1099         setInsertionMode(InTableBodyMode);
1100         return;
1101     }
1102     if (isTableCellContextTag(token.name())
1103         || token.name() == trTag) {
1104         processFakeStartTag(tbodyTag);
1105         ASSERT(insertionMode() == InTableBodyMode);
1106         processStartTag(token);
1107         return;
1108     }
1109     if (token.name() == tableTag) {
1110         parseError(token);
1111         if (!processTableEndTagForInTable()) {
1112             ASSERT(isParsingFragment());
1113             return;
1114         }
1115         processStartTag(token);
1116         return;
1117     }
1118     if (token.name() == styleTag || token.name() == scriptTag) {
1119         processStartTagForInHead(token);
1120         return;
1121     }
1122     if (token.name() == inputTag) {
1123         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1124         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1125             parseError(token);
1126             m_tree.insertSelfClosingHTMLElement(token);
1127             return;
1128         }
1129         // Fall through to "anything else" case.
1130     }
1131     if (token.name() == formTag) {
1132         parseError(token);
1133         if (m_tree.form())
1134             return;
1135         m_tree.insertHTMLFormElement(token, true);
1136         m_tree.openElements()->pop();
1137         return;
1138     }
1139     parseError(token);
1140     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1141     processStartTagForInBody(token);
1142 }
1143
1144 namespace {
1145
1146 bool shouldProcessUsingSecondaryInsertionMode(AtomicHTMLToken& token, Element* currentElement)
1147 {
1148     ASSERT(token.type() == HTMLToken::StartTag);
1149     if (currentElement->hasTagName(MathMLNames::miTag)
1150         || currentElement->hasTagName(MathMLNames::moTag)
1151         || currentElement->hasTagName(MathMLNames::mnTag)
1152         || currentElement->hasTagName(MathMLNames::msTag)
1153         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1154         return token.name() != MathMLNames::mglyphTag
1155             && token.name() != MathMLNames::malignmarkTag;
1156     }
1157     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1158         return token.name() == SVGNames::svgTag;
1159     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1160         || currentElement->hasTagName(SVGNames::descTag)
1161         || currentElement->hasTagName(SVGNames::titleTag))
1162         return true;
1163     return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
1164 }
1165
1166 }
1167
1168 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1169 {
1170     ASSERT(token.type() == HTMLToken::StartTag);
1171     switch (insertionMode()) {
1172     case InitialMode:
1173         ASSERT(insertionMode() == InitialMode);
1174         defaultForInitial();
1175         // Fall through.
1176     case BeforeHTMLMode:
1177         ASSERT(insertionMode() == BeforeHTMLMode);
1178         if (token.name() == htmlTag) {
1179             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1180             setInsertionMode(BeforeHeadMode);
1181             return;
1182         }
1183         defaultForBeforeHTML();
1184         // Fall through.
1185     case BeforeHeadMode:
1186         ASSERT(insertionMode() == BeforeHeadMode);
1187         if (token.name() == htmlTag) {
1188             m_tree.insertHTMLHtmlStartTagInBody(token);
1189             return;
1190         }
1191         if (token.name() == headTag) {
1192             m_tree.insertHTMLHeadElement(token);
1193             setInsertionMode(InHeadMode);
1194             return;
1195         }
1196         defaultForBeforeHead();
1197         // Fall through.
1198     case InHeadMode:
1199         ASSERT(insertionMode() == InHeadMode);
1200         if (processStartTagForInHead(token))
1201             return;
1202         defaultForInHead();
1203         // Fall through.
1204     case AfterHeadMode:
1205         ASSERT(insertionMode() == AfterHeadMode);
1206         if (token.name() == htmlTag) {
1207             m_tree.insertHTMLHtmlStartTagInBody(token);
1208             return;
1209         }
1210         if (token.name() == bodyTag) {
1211             m_framesetOk = false;
1212             m_tree.insertHTMLBodyElement(token);
1213             setInsertionMode(InBodyMode);
1214             return;
1215         }
1216         if (token.name() == framesetTag) {
1217             m_tree.insertHTMLElement(token);
1218             setInsertionMode(InFramesetMode);
1219             return;
1220         }
1221         if (token.name() == baseTag
1222             || token.name() == basefontTag
1223             || token.name() == bgsoundTag
1224             || token.name() == linkTag
1225             || token.name() == metaTag
1226             || token.name() == noframesTag
1227             || token.name() == scriptTag
1228             || token.name() == styleTag
1229             || token.name() == titleTag) {
1230             parseError(token);
1231             ASSERT(m_tree.head());
1232             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1233             processStartTagForInHead(token);
1234             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1235             return;
1236         }
1237         if (token.name() == headTag) {
1238             parseError(token);
1239             return;
1240         }
1241         defaultForAfterHead();
1242         // Fall through
1243     case InBodyMode:
1244         ASSERT(insertionMode() == InBodyMode);
1245         processStartTagForInBody(token);
1246         break;
1247     case InTableMode:
1248         ASSERT(insertionMode() == InTableMode);
1249         processStartTagForInTable(token);
1250         break;
1251     case InCaptionMode:
1252         ASSERT(insertionMode() == InCaptionMode);
1253         if (isCaptionColOrColgroupTag(token.name())
1254             || isTableBodyContextTag(token.name())
1255             || isTableCellContextTag(token.name())
1256             || token.name() == trTag) {
1257             parseError(token);
1258             if (!processCaptionEndTagForInCaption()) {
1259                 ASSERT(isParsingFragment());
1260                 return;
1261             }
1262             processStartTag(token);
1263             return;
1264         }
1265         processStartTagForInBody(token);
1266         break;
1267     case InColumnGroupMode:
1268         ASSERT(insertionMode() == InColumnGroupMode);
1269         if (token.name() == htmlTag) {
1270             m_tree.insertHTMLHtmlStartTagInBody(token);
1271             return;
1272         }
1273         if (token.name() == colTag) {
1274             m_tree.insertSelfClosingHTMLElement(token);
1275             return;
1276         }
1277         if (!processColgroupEndTagForInColumnGroup()) {
1278             ASSERT(isParsingFragment());
1279             return;
1280         }
1281         processStartTag(token);
1282         break;
1283     case InTableBodyMode:
1284         ASSERT(insertionMode() == InTableBodyMode);
1285         if (token.name() == trTag) {
1286             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1287             m_tree.insertHTMLElement(token);
1288             setInsertionMode(InRowMode);
1289             return;
1290         }
1291         if (isTableCellContextTag(token.name())) {
1292             parseError(token);
1293             processFakeStartTag(trTag);
1294             ASSERT(insertionMode() == InRowMode);
1295             processStartTag(token);
1296             return;
1297         }
1298         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1299             // FIXME: This is slow.
1300             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1301                 ASSERT(isParsingFragment());
1302                 parseError(token);
1303                 return;
1304             }
1305             m_tree.openElements()->popUntilTableBodyScopeMarker();
1306             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1307             processFakeEndTag(m_tree.currentElement()->tagQName());
1308             processStartTag(token);
1309             return;
1310         }
1311         processStartTagForInTable(token);
1312         break;
1313     case InRowMode:
1314         ASSERT(insertionMode() == InRowMode);
1315         if (isTableCellContextTag(token.name())) {
1316             m_tree.openElements()->popUntilTableRowScopeMarker();
1317             m_tree.insertHTMLElement(token);
1318             setInsertionMode(InCellMode);
1319             m_tree.activeFormattingElements()->appendMarker();
1320             return;
1321         }
1322         if (token.name() == trTag
1323             || isCaptionColOrColgroupTag(token.name())
1324             || isTableBodyContextTag(token.name())) {
1325             if (!processTrEndTagForInRow()) {
1326                 ASSERT(isParsingFragment());
1327                 return;
1328             }
1329             ASSERT(insertionMode() == InTableBodyMode);
1330             processStartTag(token);
1331             return;
1332         }
1333         processStartTagForInTable(token);
1334         break;
1335     case InCellMode:
1336         ASSERT(insertionMode() == InCellMode);
1337         if (isCaptionColOrColgroupTag(token.name())
1338             || isTableCellContextTag(token.name())
1339             || token.name() == trTag
1340             || isTableBodyContextTag(token.name())) {
1341             // FIXME: This could be more efficient.
1342             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1343                 ASSERT(isParsingFragment());
1344                 parseError(token);
1345                 return;
1346             }
1347             closeTheCell();
1348             processStartTag(token);
1349             return;
1350         }
1351         processStartTagForInBody(token);
1352         break;
1353     case AfterBodyMode:
1354     case AfterAfterBodyMode:
1355         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1356         if (token.name() == htmlTag) {
1357             m_tree.insertHTMLHtmlStartTagInBody(token);
1358             return;
1359         }
1360         setInsertionMode(InBodyMode);
1361         processStartTag(token);
1362         break;
1363     case InHeadNoscriptMode:
1364         ASSERT(insertionMode() == InHeadNoscriptMode);
1365         if (token.name() == htmlTag) {
1366             m_tree.insertHTMLHtmlStartTagInBody(token);
1367             return;
1368         }
1369         if (token.name() == basefontTag
1370             || token.name() == bgsoundTag
1371             || token.name() == linkTag
1372             || token.name() == metaTag
1373             || token.name() == noframesTag
1374             || token.name() == styleTag) {
1375             bool didProcess = processStartTagForInHead(token);
1376             ASSERT_UNUSED(didProcess, didProcess);
1377             return;
1378         }
1379         if (token.name() == htmlTag || token.name() == noscriptTag) {
1380             parseError(token);
1381             return;
1382         }
1383         defaultForInHeadNoscript();
1384         processToken(token);
1385         break;
1386     case InFramesetMode:
1387         ASSERT(insertionMode() == InFramesetMode);
1388         if (token.name() == htmlTag) {
1389             m_tree.insertHTMLHtmlStartTagInBody(token);
1390             return;
1391         }
1392         if (token.name() == framesetTag) {
1393             m_tree.insertHTMLElement(token);
1394             return;
1395         }
1396         if (token.name() == frameTag) {
1397             m_tree.insertSelfClosingHTMLElement(token);
1398             return;
1399         }
1400         if (token.name() == noframesTag) {
1401             processStartTagForInHead(token);
1402             return;
1403         }
1404         parseError(token);
1405         break;
1406     case AfterFramesetMode:
1407     case AfterAfterFramesetMode:
1408         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1409         if (token.name() == htmlTag) {
1410             m_tree.insertHTMLHtmlStartTagInBody(token);
1411             return;
1412         }
1413         if (token.name() == noframesTag) {
1414             processStartTagForInHead(token);
1415             return;
1416         }
1417         parseError(token);
1418         break;
1419     case InSelectInTableMode:
1420         ASSERT(insertionMode() == InSelectInTableMode);
1421         if (token.name() == captionTag
1422             || token.name() == tableTag
1423             || isTableBodyContextTag(token.name())
1424             || token.name() == trTag
1425             || isTableCellContextTag(token.name())) {
1426             parseError(token);
1427             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1428             processEndTag(endSelect);
1429             processStartTag(token);
1430             return;
1431         }
1432         // Fall through
1433     case InSelectMode:
1434         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1435         if (token.name() == htmlTag) {
1436             m_tree.insertHTMLHtmlStartTagInBody(token);
1437             return;
1438         }
1439         if (token.name() == optionTag) {
1440             if (m_tree.currentElement()->hasTagName(optionTag)) {
1441                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1442                 processEndTag(endOption);
1443             }
1444             m_tree.insertHTMLElement(token);
1445             return;
1446         }
1447         if (token.name() == optgroupTag) {
1448             if (m_tree.currentElement()->hasTagName(optionTag)) {
1449                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1450                 processEndTag(endOption);
1451             }
1452             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1453                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1454                 processEndTag(endOptgroup);
1455             }
1456             m_tree.insertHTMLElement(token);
1457             return;
1458         }
1459         if (token.name() == selectTag) {
1460             parseError(token);
1461             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1462             processEndTag(endSelect);
1463             return;
1464         }
1465         if (token.name() == inputTag
1466             || token.name() == keygenTag
1467             || token.name() == textareaTag) {
1468             parseError(token);
1469             if (!m_tree.openElements()->inTableScope(selectTag)) {
1470                 ASSERT(isParsingFragment());
1471                 return;
1472             }
1473             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1474             processEndTag(endSelect);
1475             processStartTag(token);
1476             return;
1477         }
1478         if (token.name() == scriptTag) {
1479             bool didProcess = processStartTagForInHead(token);
1480             ASSERT_UNUSED(didProcess, didProcess);
1481             return;
1482         }
1483         break;
1484     case InTableTextMode:
1485         defaultForInTableText();
1486         processStartTag(token);
1487         break;
1488     case InForeignContentMode: {
1489         if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) {
1490             processUsingSecondaryInsertionModeAndAdjustInsertionMode(token);
1491             return;
1492         }
1493         if (token.name() == bTag
1494             || token.name() == bigTag
1495             || token.name() == blockquoteTag
1496             || token.name() == bodyTag
1497             || token.name() == brTag
1498             || token.name() == centerTag
1499             || token.name() == codeTag
1500             || token.name() == ddTag
1501             || token.name() == divTag
1502             || token.name() == dlTag
1503             || token.name() == dtTag
1504             || token.name() == emTag
1505             || token.name() == embedTag
1506             || isNumberedHeaderTag(token.name())
1507             || token.name() == headTag
1508             || token.name() == hrTag
1509             || token.name() == iTag
1510             || token.name() == imgTag
1511             || token.name() == liTag
1512             || token.name() == listingTag
1513             || token.name() == menuTag
1514             || token.name() == metaTag
1515             || token.name() == nobrTag
1516             || token.name() == olTag
1517             || token.name() == pTag
1518             || token.name() == preTag
1519             || token.name() == rubyTag
1520             || token.name() == sTag
1521             || token.name() == smallTag
1522             || token.name() == spanTag
1523             || token.name() == strongTag
1524             || token.name() == strikeTag
1525             || token.name() == subTag
1526             || token.name() == supTag
1527             || token.name() == tableTag
1528             || token.name() == ttTag
1529             || token.name() == uTag
1530             || token.name() == ulTag
1531             || token.name() == varTag
1532             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1533             parseError(token);
1534             m_tree.openElements()->popUntilForeignContentScopeMarker();
1535             if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
1536                 setInsertionMode(m_secondaryInsertionMode);
1537             processStartTag(token);
1538             return;
1539         }
1540         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1541         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1542             adjustMathMLAttributes(token);
1543          if (currentNamespace == SVGNames::svgNamespaceURI) {
1544             adjustSVGTagNameCase(token);
1545             adjustSVGAttributes(token);
1546         }
1547         adjustForeignAttributes(token);
1548         m_tree.insertForeignElement(token, currentNamespace);
1549         break;
1550     }
1551     case TextMode:
1552         ASSERT_NOT_REACHED();
1553         break;
1554     }
1555 }
1556
1557 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1558 {
1559     ASSERT(token.type() == HTMLToken::EndTag);
1560     ASSERT(token.name() == bodyTag);
1561     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1562         parseError(token);
1563         return false;
1564     }
1565     notImplemented(); // Emit a more specific parse error based on stack contents.
1566     setInsertionMode(AfterBodyMode);
1567     return true;
1568 }
1569
1570 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1571 {
1572     ASSERT(token.type() == HTMLToken::EndTag);
1573     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1574     while (1) {
1575         Element* node = record->element();
1576         if (node->hasLocalName(token.name())) {
1577             m_tree.generateImpliedEndTags();
1578             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1579                 parseError(token);
1580                 // FIXME: This is either a bug in the spec, or a bug in our
1581                 // implementation.  Filed a bug with HTML5:
1582                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1583                 // We might have already popped the node for the token in
1584                 // generateImpliedEndTags, just abort.
1585                 if (!m_tree.openElements()->contains(node))
1586                     return;
1587             }
1588             m_tree.openElements()->popUntilPopped(node);
1589             return;
1590         }
1591         if (isSpecialNode(node)) {
1592             parseError(token);
1593             return;
1594         }
1595         record = record->next();
1596     }
1597 }
1598
1599 // FIXME: This probably belongs on HTMLElementStack.
1600 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1601 {
1602     HTMLElementStack::ElementRecord* furthestBlock = 0;
1603     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1604     for (; record; record = record->next()) {
1605         if (record->element() == formattingElement)
1606             return furthestBlock;
1607         if (isSpecialNode(record->element()))
1608             furthestBlock = record;
1609     }
1610     ASSERT_NOT_REACHED();
1611     return 0;
1612 }
1613
1614 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1615 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1616 {
1617     // The adoption agency algorithm is N^2.  We limit the number of iterations
1618     // to stop from hanging the whole browser.  This limit is copied from the
1619     // legacy tree builder and might need to be tweaked in the future.
1620     static const int adoptionAgencyIterationLimit = 10;
1621
1622     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1623         // 1.
1624         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1625         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1626             parseError(token);
1627             notImplemented(); // Check the stack of open elements for a more specific parse error.
1628             return;
1629         }
1630         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1631         if (!formattingElementRecord) {
1632             parseError(token);
1633             m_tree.activeFormattingElements()->remove(formattingElement);
1634             return;
1635         }
1636         if (formattingElement != m_tree.currentElement())
1637             parseError(token);
1638         // 2.
1639         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1640         // 3.
1641         if (!furthestBlock) {
1642             m_tree.openElements()->popUntilPopped(formattingElement);
1643             m_tree.activeFormattingElements()->remove(formattingElement);
1644             return;
1645         }
1646         // 4.
1647         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1648         Element* commonAncestor = formattingElementRecord->next()->element();
1649         // 5.
1650         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1651         // 6.
1652         HTMLElementStack::ElementRecord* node = furthestBlock;
1653         HTMLElementStack::ElementRecord* nextNode = node->next();
1654         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1655         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1656             // 6.1
1657             node = nextNode;
1658             ASSERT(node);
1659             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1660             // 6.2
1661             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1662                 m_tree.openElements()->remove(node->element());
1663                 node = 0;
1664                 continue;
1665             }
1666             // 6.3
1667             if (node == formattingElementRecord)
1668                 break;
1669             // 6.5
1670             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1671             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1672             nodeEntry->replaceElement(newElement.get());
1673             node->replaceElement(newElement.release());
1674             // 6.4 -- Intentionally out of order to handle the case where node
1675             // was replaced in 6.5.
1676             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1677             if (lastNode == furthestBlock)
1678                 bookmark.moveToAfter(nodeEntry);
1679             // 6.6
1680             if (Element* parent = lastNode->element()->parentElement())
1681                 parent->parserRemoveChild(lastNode->element());
1682             node->element()->parserAddChild(lastNode->element());
1683             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1684                 lastNode->element()->lazyAttach();
1685             // 6.7
1686             lastNode = node;
1687         }
1688         // 7
1689         const AtomicString& commonAncestorTag = commonAncestor->localName();
1690         if (Element* parent = lastNode->element()->parentElement())
1691             parent->parserRemoveChild(lastNode->element());
1692         // FIXME: If this moves to HTMLConstructionSite, this check should use
1693         // causesFosterParenting(tagName) instead.
1694         if (commonAncestorTag == tableTag
1695             || commonAncestorTag == trTag
1696             || isTableBodyContextTag(commonAncestorTag))
1697             m_tree.fosterParent(lastNode->element());
1698         else {
1699             commonAncestor->parserAddChild(lastNode->element());
1700             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1701                 lastNode->element()->lazyAttach();
1702         }
1703         // 8
1704         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1705         // 9
1706         newElement->takeAllChildrenFrom(furthestBlock->element());
1707         // 10
1708         Element* furthestBlockElement = furthestBlock->element();
1709         // FIXME: All this creation / parserAddChild / attach business should
1710         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1711         //        should all be in some HTMLConstructionSite function.
1712         furthestBlockElement->parserAddChild(newElement);
1713         if (furthestBlockElement->attached() && !newElement->attached()) {
1714             // Notice that newElement might already be attached if, for example, one of the reparented
1715             // children is a style element, which attaches itself automatically.
1716             newElement->attach();
1717         }
1718         // 11
1719         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1720         // 12
1721         m_tree.openElements()->remove(formattingElement);
1722         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1723     }
1724 }
1725
1726 void HTMLTreeBuilder::setSecondaryInsertionMode(InsertionMode mode)
1727 {
1728     ASSERT(mode != InForeignContentMode);
1729     m_secondaryInsertionMode = mode;
1730 }
1731
1732 void HTMLTreeBuilder::setInsertionModeAndEnd(InsertionMode newInsertionMode, bool foreign)
1733 {
1734     setInsertionMode(newInsertionMode);
1735     if (foreign) {
1736         setSecondaryInsertionMode(m_insertionMode);
1737         setInsertionMode(InForeignContentMode);
1738     }
1739 }
1740
1741 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1742 {
1743     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1744     bool last = false;
1745     bool foreign = false;
1746     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1747     while (1) {
1748         Element* node = nodeRecord->element();
1749         if (node == m_tree.openElements()->bottom()) {
1750             ASSERT(isParsingFragment());
1751             last = true;
1752             node = m_fragmentContext.contextElement();
1753         }
1754         if (node->hasTagName(selectTag)) {
1755             ASSERT(isParsingFragment());
1756             return setInsertionModeAndEnd(InSelectMode, foreign);
1757         }
1758         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1759             return setInsertionModeAndEnd(InCellMode, foreign);
1760         if (node->hasTagName(trTag))
1761             return setInsertionModeAndEnd(InRowMode, foreign);
1762         if (isTableBodyContextTag(node->localName()))
1763             return setInsertionModeAndEnd(InTableBodyMode, foreign);
1764         if (node->hasTagName(captionTag))
1765             return setInsertionModeAndEnd(InCaptionMode, foreign);
1766         if (node->hasTagName(colgroupTag)) {
1767             ASSERT(isParsingFragment());
1768             return setInsertionModeAndEnd(InColumnGroupMode, foreign);
1769         }
1770         if (node->hasTagName(tableTag))
1771             return setInsertionModeAndEnd(InTableMode, foreign);
1772         if (node->hasTagName(headTag)) {
1773             ASSERT(isParsingFragment());
1774             return setInsertionModeAndEnd(InBodyMode, foreign);
1775         }
1776         if (node->hasTagName(bodyTag))
1777             return setInsertionModeAndEnd(InBodyMode, foreign);
1778         if (node->hasTagName(framesetTag)) {
1779             ASSERT(isParsingFragment());
1780             return setInsertionModeAndEnd(InFramesetMode, foreign);
1781         }
1782         if (node->hasTagName(htmlTag)) {
1783             ASSERT(isParsingFragment());
1784             return setInsertionModeAndEnd(BeforeHeadMode, foreign);
1785         }
1786         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1787             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1788             foreign = true;
1789         if (last) {
1790             ASSERT(isParsingFragment());
1791             return setInsertionModeAndEnd(InBodyMode, foreign);
1792         }
1793         nodeRecord = nodeRecord->next();
1794     }
1795 }
1796
1797 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1798 {
1799     ASSERT(token.type() == HTMLToken::EndTag);
1800     if (isTableBodyContextTag(token.name())) {
1801         if (!m_tree.openElements()->inTableScope(token.name())) {
1802             parseError(token);
1803             return;
1804         }
1805         m_tree.openElements()->popUntilTableBodyScopeMarker();
1806         m_tree.openElements()->pop();
1807         setInsertionMode(InTableMode);
1808         return;
1809     }
1810     if (token.name() == tableTag) {
1811         // FIXME: This is slow.
1812         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1813             ASSERT(isParsingFragment());
1814             parseError(token);
1815             return;
1816         }
1817         m_tree.openElements()->popUntilTableBodyScopeMarker();
1818         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1819         processFakeEndTag(m_tree.currentElement()->tagQName());
1820         processEndTag(token);
1821         return;
1822     }
1823     if (token.name() == bodyTag
1824         || isCaptionColOrColgroupTag(token.name())
1825         || token.name() == htmlTag
1826         || isTableCellContextTag(token.name())
1827         || token.name() == trTag) {
1828         parseError(token);
1829         return;
1830     }
1831     processEndTagForInTable(token);
1832 }
1833
1834 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1835 {
1836     ASSERT(token.type() == HTMLToken::EndTag);
1837     if (token.name() == trTag) {
1838         processTrEndTagForInRow();
1839         return;
1840     }
1841     if (token.name() == tableTag) {
1842         if (!processTrEndTagForInRow()) {
1843             ASSERT(isParsingFragment());
1844             return;
1845         }
1846         ASSERT(insertionMode() == InTableBodyMode);
1847         processEndTag(token);
1848         return;
1849     }
1850     if (isTableBodyContextTag(token.name())) {
1851         if (!m_tree.openElements()->inTableScope(token.name())) {
1852             parseError(token);
1853             return;
1854         }
1855         processFakeEndTag(trTag);
1856         ASSERT(insertionMode() == InTableBodyMode);
1857         processEndTag(token);
1858         return;
1859     }
1860     if (token.name() == bodyTag
1861         || isCaptionColOrColgroupTag(token.name())
1862         || token.name() == htmlTag
1863         || isTableCellContextTag(token.name())) {
1864         parseError(token);
1865         return;
1866     }
1867     processEndTagForInTable(token);
1868 }
1869
1870 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1871 {
1872     ASSERT(token.type() == HTMLToken::EndTag);
1873     if (isTableCellContextTag(token.name())) {
1874         if (!m_tree.openElements()->inTableScope(token.name())) {
1875             parseError(token);
1876             return;
1877         }
1878         m_tree.generateImpliedEndTags();
1879         if (!m_tree.currentElement()->hasLocalName(token.name()))
1880             parseError(token);
1881         m_tree.openElements()->popUntilPopped(token.name());
1882         m_tree.activeFormattingElements()->clearToLastMarker();
1883         setInsertionMode(InRowMode);
1884         return;
1885     }
1886     if (token.name() == bodyTag
1887         || isCaptionColOrColgroupTag(token.name())
1888         || token.name() == htmlTag) {
1889         parseError(token);
1890         return;
1891     }
1892     if (token.name() == tableTag
1893         || token.name() == trTag
1894         || isTableBodyContextTag(token.name())) {
1895         if (!m_tree.openElements()->inTableScope(token.name())) {
1896             ASSERT(isParsingFragment());
1897             parseError(token);
1898             return;
1899         }
1900         closeTheCell();
1901         processEndTag(token);
1902         return;
1903     }
1904     processEndTagForInBody(token);
1905 }
1906
1907 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1908 {
1909     ASSERT(token.type() == HTMLToken::EndTag);
1910     if (token.name() == bodyTag) {
1911         processBodyEndTagForInBody(token);
1912         return;
1913     }
1914     if (token.name() == htmlTag) {
1915         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1916         if (processBodyEndTagForInBody(endBody))
1917             processEndTag(token);
1918         return;
1919     }
1920     if (token.name() == addressTag
1921         || token.name() == articleTag
1922         || token.name() == asideTag
1923         || token.name() == blockquoteTag
1924         || token.name() == buttonTag
1925         || token.name() == centerTag
1926         || token.name() == detailsTag
1927         || token.name() == dirTag
1928         || token.name() == divTag
1929         || token.name() == dlTag
1930         || token.name() == fieldsetTag
1931         || token.name() == figcaptionTag
1932         || token.name() == figureTag
1933         || token.name() == footerTag
1934         || token.name() == headerTag
1935         || token.name() == hgroupTag
1936         || token.name() == listingTag
1937         || token.name() == menuTag
1938         || token.name() == navTag
1939         || token.name() == olTag
1940         || token.name() == preTag
1941         || token.name() == sectionTag
1942         || token.name() == summaryTag
1943         || token.name() == ulTag) {
1944         if (!m_tree.openElements()->inScope(token.name())) {
1945             parseError(token);
1946             return;
1947         }
1948         m_tree.generateImpliedEndTags();
1949         if (!m_tree.currentElement()->hasLocalName(token.name()))
1950             parseError(token);
1951         m_tree.openElements()->popUntilPopped(token.name());
1952         return;
1953     }
1954     if (token.name() == formTag) {
1955         RefPtr<Element> node = m_tree.takeForm();
1956         if (!node || !m_tree.openElements()->inScope(node.get())) {
1957             parseError(token);
1958             return;
1959         }
1960         m_tree.generateImpliedEndTags();
1961         if (m_tree.currentElement() != node.get())
1962             parseError(token);
1963         m_tree.openElements()->remove(node.get());
1964     }
1965     if (token.name() == pTag) {
1966         if (!m_tree.openElements()->inButtonScope(token.name())) {
1967             parseError(token);
1968             processFakeStartTag(pTag);
1969             ASSERT(m_tree.openElements()->inScope(token.name()));
1970             processEndTag(token);
1971             return;
1972         }
1973         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1974         if (!m_tree.currentElement()->hasLocalName(token.name()))
1975             parseError(token);
1976         m_tree.openElements()->popUntilPopped(token.name());
1977         return;
1978     }
1979     if (token.name() == liTag) {
1980         if (!m_tree.openElements()->inListItemScope(token.name())) {
1981             parseError(token);
1982             return;
1983         }
1984         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1985         if (!m_tree.currentElement()->hasLocalName(token.name()))
1986             parseError(token);
1987         m_tree.openElements()->popUntilPopped(token.name());
1988         return;
1989     }
1990     if (token.name() == ddTag
1991         || token.name() == dtTag) {
1992         if (!m_tree.openElements()->inScope(token.name())) {
1993             parseError(token);
1994             return;
1995         }
1996         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1997         if (!m_tree.currentElement()->hasLocalName(token.name()))
1998             parseError(token);
1999         m_tree.openElements()->popUntilPopped(token.name());
2000         return;
2001     }
2002     if (isNumberedHeaderTag(token.name())) {
2003         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
2004             parseError(token);
2005             return;
2006         }
2007         m_tree.generateImpliedEndTags();
2008         if (!m_tree.currentElement()->hasLocalName(token.name()))
2009             parseError(token);
2010         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
2011         return;
2012     }
2013     if (token.name() == "sarcasm") {
2014         notImplemented(); // Take a deep breath.
2015         return;
2016     }
2017     if (isFormattingTag(token.name())) {
2018         callTheAdoptionAgency(token);
2019         return;
2020     }
2021     if (token.name() == appletTag
2022         || token.name() == marqueeTag
2023         || token.name() == objectTag) {
2024         if (!m_tree.openElements()->inScope(token.name())) {
2025             parseError(token);
2026             return;
2027         }
2028         m_tree.generateImpliedEndTags();
2029         if (!m_tree.currentElement()->hasLocalName(token.name()))
2030             parseError(token);
2031         m_tree.openElements()->popUntilPopped(token.name());
2032         m_tree.activeFormattingElements()->clearToLastMarker();
2033         return;
2034     }
2035     if (token.name() == brTag) {
2036         parseError(token);
2037         processFakeStartTag(brTag);
2038         return;
2039     }
2040     processAnyOtherEndTagForInBody(token);
2041 }
2042
2043 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2044 {
2045     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2046         ASSERT(isParsingFragment());
2047         // FIXME: parse error
2048         return false;
2049     }
2050     m_tree.generateImpliedEndTags();
2051     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2052     m_tree.openElements()->popUntilPopped(captionTag.localName());
2053     m_tree.activeFormattingElements()->clearToLastMarker();
2054     setInsertionMode(InTableMode);
2055     return true;
2056 }
2057
2058 bool HTMLTreeBuilder::processTrEndTagForInRow()
2059 {
2060     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2061         ASSERT(isParsingFragment());
2062         // FIXME: parse error
2063         return false;
2064     }
2065     m_tree.openElements()->popUntilTableRowScopeMarker();
2066     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2067     m_tree.openElements()->pop();
2068     setInsertionMode(InTableBodyMode);
2069     return true;
2070 }
2071
2072 bool HTMLTreeBuilder::processTableEndTagForInTable()
2073 {
2074     if (!m_tree.openElements()->inTableScope(tableTag)) {
2075         ASSERT(isParsingFragment());
2076         // FIXME: parse error.
2077         return false;
2078     }
2079     m_tree.openElements()->popUntilPopped(tableTag.localName());
2080     resetInsertionModeAppropriately();
2081     return true;
2082 }
2083
2084 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2085 {
2086     ASSERT(token.type() == HTMLToken::EndTag);
2087     if (token.name() == tableTag) {
2088         processTableEndTagForInTable();
2089         return;
2090     }
2091     if (token.name() == bodyTag
2092         || isCaptionColOrColgroupTag(token.name())
2093         || token.name() == htmlTag
2094         || isTableBodyContextTag(token.name())
2095         || isTableCellContextTag(token.name())
2096         || token.name() == trTag) {
2097         parseError(token);
2098         return;
2099     }
2100     // Is this redirection necessary here?
2101     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2102     processEndTagForInBody(token);
2103 }
2104
2105 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2106 {
2107     ASSERT(token.type() == HTMLToken::EndTag);
2108     switch (insertionMode()) {
2109     case InitialMode:
2110         ASSERT(insertionMode() == InitialMode);
2111         defaultForInitial();
2112         // Fall through.
2113     case BeforeHTMLMode:
2114         ASSERT(insertionMode() == BeforeHTMLMode);
2115         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2116             parseError(token);
2117             return;
2118         }
2119         defaultForBeforeHTML();
2120         // Fall through.
2121     case BeforeHeadMode:
2122         ASSERT(insertionMode() == BeforeHeadMode);
2123         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2124             parseError(token);
2125             return;
2126         }
2127         defaultForBeforeHead();
2128         // Fall through.
2129     case InHeadMode:
2130         ASSERT(insertionMode() == InHeadMode);
2131         if (token.name() == headTag) {
2132             m_tree.openElements()->popHTMLHeadElement();
2133             setInsertionMode(AfterHeadMode);
2134             return;
2135         }
2136         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2137             parseError(token);
2138             return;
2139         }
2140         defaultForInHead();
2141         // Fall through.
2142     case AfterHeadMode:
2143         ASSERT(insertionMode() == AfterHeadMode);
2144         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2145             parseError(token);
2146             return;
2147         }
2148         defaultForAfterHead();
2149         // Fall through
2150     case InBodyMode:
2151         ASSERT(insertionMode() == InBodyMode);
2152         processEndTagForInBody(token);
2153         break;
2154     case InTableMode:
2155         ASSERT(insertionMode() == InTableMode);
2156         processEndTagForInTable(token);
2157         break;
2158     case InCaptionMode:
2159         ASSERT(insertionMode() == InCaptionMode);
2160         if (token.name() == captionTag) {
2161             processCaptionEndTagForInCaption();
2162             return;
2163         }
2164         if (token.name() == tableTag) {
2165             parseError(token);
2166             if (!processCaptionEndTagForInCaption()) {
2167                 ASSERT(isParsingFragment());
2168                 return;
2169             }
2170             processEndTag(token);
2171             return;
2172         }
2173         if (token.name() == bodyTag
2174             || token.name() == colTag
2175             || token.name() == colgroupTag
2176             || token.name() == htmlTag
2177             || isTableBodyContextTag(token.name())
2178             || isTableCellContextTag(token.name())
2179             || token.name() == trTag) {
2180             parseError(token);
2181             return;
2182         }
2183         processEndTagForInBody(token);
2184         break;
2185     case InColumnGroupMode:
2186         ASSERT(insertionMode() == InColumnGroupMode);
2187         if (token.name() == colgroupTag) {
2188             processColgroupEndTagForInColumnGroup();
2189             return;
2190         }
2191         if (token.name() == colTag) {
2192             parseError(token);
2193             return;
2194         }
2195         if (!processColgroupEndTagForInColumnGroup()) {
2196             ASSERT(isParsingFragment());
2197             return;
2198         }
2199         processEndTag(token);
2200         break;
2201     case InRowMode:
2202         ASSERT(insertionMode() == InRowMode);
2203         processEndTagForInRow(token);
2204         break;
2205     case InCellMode:
2206         ASSERT(insertionMode() == InCellMode);
2207         processEndTagForInCell(token);
2208         break;
2209     case InTableBodyMode:
2210         ASSERT(insertionMode() == InTableBodyMode);
2211         processEndTagForInTableBody(token);
2212         break;
2213     case AfterBodyMode:
2214         ASSERT(insertionMode() == AfterBodyMode);
2215         if (token.name() == htmlTag) {
2216             if (isParsingFragment()) {
2217                 parseError(token);
2218                 return;
2219             }
2220             setInsertionMode(AfterAfterBodyMode);
2221             return;
2222         }
2223         // Fall through.
2224     case AfterAfterBodyMode:
2225         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2226         parseError(token);
2227         setInsertionMode(InBodyMode);
2228         processEndTag(token);
2229         break;
2230     case InHeadNoscriptMode:
2231         ASSERT(insertionMode() == InHeadNoscriptMode);
2232         if (token.name() == noscriptTag) {
2233             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2234             m_tree.openElements()->pop();
2235             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2236             setInsertionMode(InHeadMode);
2237             return;
2238         }
2239         if (token.name() != brTag) {
2240             parseError(token);
2241             return;
2242         }
2243         defaultForInHeadNoscript();
2244         processToken(token);
2245         break;
2246     case TextMode:
2247         if (token.name() == scriptTag) {
2248             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2249             m_isPaused = true;
2250             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2251             m_scriptToProcess = m_tree.currentElement();
2252             m_scriptToProcessStartLine = m_lastScriptElementStartLine + 1;
2253             m_tree.openElements()->pop();
2254             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2255                 m_scriptToProcess->removeAllChildren();
2256             setInsertionMode(m_originalInsertionMode);
2257             return;
2258         }
2259         m_tree.openElements()->pop();
2260         setInsertionMode(m_originalInsertionMode);
2261         break;
2262     case InFramesetMode:
2263         ASSERT(insertionMode() == InFramesetMode);
2264         if (token.name() == framesetTag) {
2265             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2266                 parseError(token);
2267                 return;
2268             }
2269             m_tree.openElements()->pop();
2270             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2271                 setInsertionMode(AfterFramesetMode);
2272             return;
2273         }
2274         break;
2275     case AfterFramesetMode:
2276         ASSERT(insertionMode() == AfterFramesetMode);
2277         if (token.name() == htmlTag) {
2278             setInsertionMode(AfterAfterFramesetMode);
2279             return;
2280         }
2281         // Fall through.
2282     case AfterAfterFramesetMode:
2283         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2284         parseError(token);
2285         break;
2286     case InSelectInTableMode:
2287         ASSERT(insertionMode() == InSelectInTableMode);
2288         if (token.name() == captionTag
2289             || token.name() == tableTag
2290             || isTableBodyContextTag(token.name())
2291             || token.name() == trTag
2292             || isTableCellContextTag(token.name())) {
2293             parseError(token);
2294             if (m_tree.openElements()->inTableScope(token.name())) {
2295                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2296                 processEndTag(endSelect);
2297                 processEndTag(token);
2298             }
2299             return;
2300         }
2301         // Fall through.
2302     case InSelectMode:
2303         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2304         if (token.name() == optgroupTag) {
2305             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2306                 processFakeEndTag(optionTag);
2307             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2308                 m_tree.openElements()->pop();
2309                 return;
2310             }
2311             parseError(token);
2312             return;
2313         }
2314         if (token.name() == optionTag) {
2315             if (m_tree.currentElement()->hasTagName(optionTag)) {
2316                 m_tree.openElements()->pop();
2317                 return;
2318             }
2319             parseError(token);
2320             return;
2321         }
2322         if (token.name() == selectTag) {
2323             if (!m_tree.openElements()->inTableScope(token.name())) {
2324                 ASSERT(isParsingFragment());
2325                 parseError(token);
2326                 return;
2327             }
2328             m_tree.openElements()->popUntilPopped(selectTag.localName());
2329             resetInsertionModeAppropriately();
2330             return;
2331         }
2332         break;
2333     case InTableTextMode:
2334         defaultForInTableText();
2335         processEndTag(token);
2336         break;
2337     case InForeignContentMode:
2338         if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
2339             notImplemented();
2340             return;
2341         }
2342         if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
2343             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2344             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2345             if (!nodeRecord->element()->hasLocalName(token.name()))
2346                 parseError(token);
2347             while (1) {
2348                 if (nodeRecord->element()->hasLocalName(token.name())) {
2349                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2350                     break;
2351                 }
2352                 nodeRecord = nodeRecord->next();
2353                 if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
2354                     break;
2355             }
2356         }
2357         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2358         processUsingSecondaryInsertionModeAndAdjustInsertionMode(token);
2359         break;
2360     }
2361 }
2362
2363 class HTMLTreeBuilder::FakeInsertionMode : public Noncopyable {
2364 public:
2365     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2366         : m_treeBuilder(treeBuilder)
2367         , m_originalMode(treeBuilder->insertionMode())
2368     {
2369         m_treeBuilder->setFakeInsertionMode(mode);
2370     }
2371
2372     ~FakeInsertionMode()
2373     {
2374         if (m_treeBuilder->isFakeInsertionMode())
2375             m_treeBuilder->setInsertionMode(m_originalMode);
2376     }
2377
2378 private:
2379     HTMLTreeBuilder* m_treeBuilder;
2380     InsertionMode m_originalMode;
2381 };
2382
2383 // This handles both secondary insertion mode processing, as well as updating
2384 // the insertion mode.  These are separate steps in the spec, but always occur
2385 // right after one another.
2386 void HTMLTreeBuilder::processUsingSecondaryInsertionModeAndAdjustInsertionMode(AtomicHTMLToken& token)
2387 {
2388     ASSERT(token.type() == HTMLToken::StartTag || token.type() == HTMLToken::EndTag);
2389     {
2390         FakeInsertionMode fakeMode(this, m_secondaryInsertionMode);
2391         processToken(token);
2392     }
2393     if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
2394         setInsertionMode(m_secondaryInsertionMode);
2395 }
2396
2397 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2398 {
2399     ASSERT(token.type() == HTMLToken::Comment);
2400     if (m_insertionMode == InitialMode
2401         || m_insertionMode == BeforeHTMLMode
2402         || m_insertionMode == AfterAfterBodyMode
2403         || m_insertionMode == AfterAfterFramesetMode) {
2404         m_tree.insertCommentOnDocument(token);
2405         return;
2406     }
2407     if (m_insertionMode == AfterBodyMode) {
2408         m_tree.insertCommentOnHTMLHtmlElement(token);
2409         return;
2410     }
2411     if (m_insertionMode == InTableTextMode) {
2412         defaultForInTableText();
2413         processComment(token);
2414         return;
2415     }
2416     m_tree.insertComment(token);
2417 }
2418
2419 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2420 {
2421     ASSERT(token.type() == HTMLToken::Character);
2422     ExternalCharacterTokenBuffer buffer(token);
2423     processCharacterBuffer(buffer);
2424 }
2425
2426 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2427 {
2428 ReprocessBuffer:
2429     switch (insertionMode()) {
2430     case InitialMode: {
2431         ASSERT(insertionMode() == InitialMode);
2432         buffer.skipLeadingWhitespace();
2433         if (buffer.isEmpty())
2434             return;
2435         defaultForInitial();
2436         // Fall through.
2437     }
2438     case BeforeHTMLMode: {
2439         ASSERT(insertionMode() == BeforeHTMLMode);
2440         buffer.skipLeadingWhitespace();
2441         if (buffer.isEmpty())
2442             return;
2443         defaultForBeforeHTML();
2444         // Fall through.
2445     }
2446     case BeforeHeadMode: {
2447         ASSERT(insertionMode() == BeforeHeadMode);
2448         buffer.skipLeadingWhitespace();
2449         if (buffer.isEmpty())
2450             return;
2451         defaultForBeforeHead();
2452         // Fall through.
2453     }
2454     case InHeadMode: {
2455         ASSERT(insertionMode() == InHeadMode);
2456         String leadingWhitespace = buffer.takeLeadingWhitespace();
2457         if (!leadingWhitespace.isEmpty())
2458             m_tree.insertTextNode(leadingWhitespace);
2459         if (buffer.isEmpty())
2460             return;
2461         defaultForInHead();
2462         // Fall through.
2463     }
2464     case AfterHeadMode: {
2465         ASSERT(insertionMode() == AfterHeadMode);
2466         String leadingWhitespace = buffer.takeLeadingWhitespace();
2467         if (!leadingWhitespace.isEmpty())
2468             m_tree.insertTextNode(leadingWhitespace);
2469         if (buffer.isEmpty())
2470             return;
2471         defaultForAfterHead();
2472         // Fall through.
2473     }
2474     case InBodyMode:
2475     case InCaptionMode:
2476     case InCellMode: {
2477         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2478         m_tree.reconstructTheActiveFormattingElements();
2479         String characters = buffer.takeRemaining();
2480         m_tree.insertTextNode(characters);
2481         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2482             m_framesetOk = false;
2483         break;
2484     }
2485     case InTableMode:
2486     case InTableBodyMode:
2487     case InRowMode: {
2488         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2489         ASSERT(m_pendingTableCharacters.isEmpty());
2490         m_originalInsertionMode = m_insertionMode;
2491         setInsertionMode(InTableTextMode);
2492         // Fall through.
2493     }
2494     case InTableTextMode: {
2495         buffer.giveRemainingTo(m_pendingTableCharacters);
2496         break;
2497     }
2498     case InColumnGroupMode: {
2499         ASSERT(insertionMode() == InColumnGroupMode);
2500         String leadingWhitespace = buffer.takeLeadingWhitespace();
2501         if (!leadingWhitespace.isEmpty())
2502             m_tree.insertTextNode(leadingWhitespace);
2503         if (buffer.isEmpty())
2504             return;
2505         if (!processColgroupEndTagForInColumnGroup()) {
2506             ASSERT(isParsingFragment());
2507             // The spec tells us to drop these characters on the floor.
2508             buffer.takeLeadingNonWhitespace();
2509             if (buffer.isEmpty())
2510                 return;
2511         }
2512         goto ReprocessBuffer;
2513     }
2514     case AfterBodyMode:
2515     case AfterAfterBodyMode: {
2516         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2517         // FIXME: parse error
2518         setInsertionMode(InBodyMode);
2519         goto ReprocessBuffer;
2520         break;
2521     }
2522     case TextMode: {
2523         ASSERT(insertionMode() == TextMode);
2524         m_tree.insertTextNode(buffer.takeRemaining());
2525         break;
2526     }
2527     case InHeadNoscriptMode: {
2528         ASSERT(insertionMode() == InHeadNoscriptMode);
2529         String leadingWhitespace = buffer.takeLeadingWhitespace();
2530         if (!leadingWhitespace.isEmpty())
2531             m_tree.insertTextNode(leadingWhitespace);
2532         if (buffer.isEmpty())
2533             return;
2534         defaultForInHeadNoscript();
2535         goto ReprocessBuffer;
2536         break;
2537     }
2538     case InFramesetMode:
2539     case AfterFramesetMode: {
2540         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2541         String leadingWhitespace = buffer.takeRemainingWhitespace();
2542         if (!leadingWhitespace.isEmpty())
2543             m_tree.insertTextNode(leadingWhitespace);
2544         // FIXME: We should generate a parse error if we skipped over any
2545         // non-whitespace characters.
2546         break;
2547     }
2548     case InSelectInTableMode:
2549     case InSelectMode: {
2550         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2551         m_tree.insertTextNode(buffer.takeRemaining());
2552         break;
2553     }
2554     case InForeignContentMode: {
2555         ASSERT(insertionMode() == InForeignContentMode);
2556         String characters = buffer.takeRemaining();
2557         m_tree.insertTextNode(characters);
2558         if (m_framesetOk && !isAllWhitespace(characters))
2559             m_framesetOk = false;
2560         break;
2561     }
2562     case AfterAfterFramesetMode: {
2563         String leadingWhitespace = buffer.takeRemainingWhitespace();
2564         if (!leadingWhitespace.isEmpty()) {
2565             m_tree.reconstructTheActiveFormattingElements();
2566             m_tree.insertTextNode(leadingWhitespace);
2567         }
2568         // FIXME: We should generate a parse error if we skipped over any
2569         // non-whitespace characters.
2570         break;
2571     }
2572     }
2573 }
2574
2575 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2576 {
2577     ASSERT(token.type() == HTMLToken::EndOfFile);
2578     switch (insertionMode()) {
2579     case InitialMode:
2580         ASSERT(insertionMode() == InitialMode);
2581         defaultForInitial();
2582         // Fall through.
2583     case BeforeHTMLMode:
2584         ASSERT(insertionMode() == BeforeHTMLMode);
2585         defaultForBeforeHTML();
2586         // Fall through.
2587     case BeforeHeadMode:
2588         ASSERT(insertionMode() == BeforeHeadMode);
2589         defaultForBeforeHead();
2590         // Fall through.
2591     case InHeadMode:
2592         ASSERT(insertionMode() == InHeadMode);
2593         defaultForInHead();
2594         // Fall through.
2595     case AfterHeadMode:
2596         ASSERT(insertionMode() == AfterHeadMode);
2597         defaultForAfterHead();
2598         // Fall through
2599     case InBodyMode:
2600     case InCellMode:
2601         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
2602         notImplemented(); // Emit parse error based on what elemtns are still open.
2603         break;
2604     case AfterBodyMode:
2605     case AfterAfterBodyMode:
2606         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2607         notImplemented();
2608         break;
2609     case InHeadNoscriptMode:
2610         ASSERT(insertionMode() == InHeadNoscriptMode);
2611         defaultForInHeadNoscript();
2612         processEndOfFile(token);
2613         return;
2614     case AfterFramesetMode:
2615     case AfterAfterFramesetMode:
2616         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2617         break;
2618     case InFramesetMode:
2619     case InTableMode:
2620     case InTableBodyMode:
2621     case InSelectInTableMode:
2622     case InSelectMode:
2623         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2624         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2625             parseError(token);
2626         break;
2627     case InColumnGroupMode:
2628         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2629             ASSERT(isParsingFragment());
2630             return;
2631         }
2632         if (!processColgroupEndTagForInColumnGroup()) {
2633             ASSERT(isParsingFragment());
2634             return;
2635         }
2636         processEndOfFile(token);
2637         return;
2638     case InForeignContentMode:
2639         parseError(token);
2640         m_tree.openElements()->popUntilForeignContentScopeMarker();
2641         // FIXME: The spec adds the following condition before setting the
2642         //        insertion mode.  However, this condition causes an infinite loop.
2643         //        See http://www.w3.org/Bugs/Public/show_bug.cgi?id=10621
2644         //        if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
2645         setInsertionMode(m_secondaryInsertionMode);
2646         processEndOfFile(token);
2647         return;
2648     case InTableTextMode:
2649         defaultForInTableText();
2650         processEndOfFile(token);
2651         return;
2652     case TextMode:
2653     case InCaptionMode:
2654     case InRowMode:
2655         notImplemented();
2656         break;
2657     }
2658     ASSERT(m_tree.openElements()->top());
2659     m_tree.openElements()->popAll();
2660 }
2661
2662 void HTMLTreeBuilder::defaultForInitial()
2663 {
2664     notImplemented();
2665     if (!m_fragmentContext.fragment())
2666         m_document->setCompatibilityMode(Document::QuirksMode);
2667     // FIXME: parse error
2668     setInsertionMode(BeforeHTMLMode);
2669 }
2670
2671 void HTMLTreeBuilder::defaultForBeforeHTML()
2672 {
2673     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2674     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2675     setInsertionMode(BeforeHeadMode);
2676 }
2677
2678 void HTMLTreeBuilder::defaultForBeforeHead()
2679 {
2680     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2681     processStartTag(startHead);
2682 }
2683
2684 void HTMLTreeBuilder::defaultForInHead()
2685 {
2686     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2687     processEndTag(endHead);
2688 }
2689
2690 void HTMLTreeBuilder::defaultForInHeadNoscript()
2691 {
2692     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2693     processEndTag(endNoscript);
2694 }
2695
2696 void HTMLTreeBuilder::defaultForAfterHead()
2697 {
2698     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2699     processStartTag(startBody);
2700     m_framesetOk = true;
2701 }
2702
2703 void HTMLTreeBuilder::defaultForInTableText()
2704 {
2705     String characters = String::adopt(m_pendingTableCharacters);
2706     if (!isAllWhitespace(characters)) {
2707         // FIXME: parse error
2708         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2709         m_tree.reconstructTheActiveFormattingElements();
2710         m_tree.insertTextNode(characters);
2711         m_framesetOk = false;
2712         setInsertionMode(m_originalInsertionMode);
2713         return;
2714     }
2715     m_tree.insertTextNode(characters);
2716     setInsertionMode(m_originalInsertionMode);
2717 }
2718
2719 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2720 {
2721     ASSERT(token.type() == HTMLToken::StartTag);
2722     if (token.name() == htmlTag) {
2723         m_tree.insertHTMLHtmlStartTagInBody(token);
2724         return true;
2725     }
2726     if (token.name() == baseTag
2727         || token.name() == basefontTag
2728         || token.name() == bgsoundTag
2729         || token.name() == commandTag
2730         || token.name() == linkTag
2731         || token.name() == metaTag) {
2732         m_tree.insertSelfClosingHTMLElement(token);
2733         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2734         return true;
2735     }
2736     if (token.name() == titleTag) {
2737         processGenericRCDATAStartTag(token);
2738         return true;
2739     }
2740     if (token.name() == noscriptTag) {
2741         if (scriptEnabled(m_document->frame())) {
2742             processGenericRawTextStartTag(token);
2743             return true;
2744         }
2745         m_tree.insertHTMLElement(token);
2746         setInsertionMode(InHeadNoscriptMode);
2747         return true;
2748     }
2749     if (token.name() == noframesTag || token.name() == styleTag) {
2750         processGenericRawTextStartTag(token);
2751         return true;
2752     }
2753     if (token.name() == scriptTag) {
2754         processScriptStartTag(token);
2755         return true;
2756     }
2757     if (token.name() == headTag) {
2758         parseError(token);
2759         return true;
2760     }
2761     return false;
2762 }
2763
2764 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2765 {
2766     ASSERT(token.type() == HTMLToken::StartTag);
2767     m_tree.insertHTMLElement(token);
2768     m_tokenizer->setState(HTMLTokenizer::RCDATAState);
2769     m_originalInsertionMode = m_insertionMode;
2770     setInsertionMode(TextMode);
2771 }
2772
2773 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2774 {
2775     ASSERT(token.type() == HTMLToken::StartTag);
2776     m_tree.insertHTMLElement(token);
2777     m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
2778     m_originalInsertionMode = m_insertionMode;
2779     setInsertionMode(TextMode);
2780 }
2781
2782 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2783 {
2784     ASSERT(token.type() == HTMLToken::StartTag);
2785     m_tree.insertScriptElement(token);
2786     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
2787     m_originalInsertionMode = m_insertionMode;
2788     m_lastScriptElementStartLine = m_tokenizer->lineNumber();
2789     setInsertionMode(TextMode);
2790 }
2791
2792 void HTMLTreeBuilder::finished()
2793 {
2794     ASSERT(m_document);
2795     if (isParsingFragment()) {
2796         m_fragmentContext.finished();
2797         return;
2798     }
2799
2800     // Warning, this may detach the parser. Do not do anything else after this.
2801     m_document->finishedParsing();
2802 }
2803
2804 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2805 {
2806     if (!frame)
2807         return false;
2808     if (ScriptController* scriptController = frame->script())
2809         return scriptController->canExecuteScripts(NotAboutToExecuteScript);
2810     return false;
2811 }
2812
2813 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2814 {
2815     if (!frame)
2816         return false;
2817     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2818 }
2819
2820 // FIXME: Move this function to a more appropriate place.
2821 String serializeForNumberType(double number)
2822 {
2823     // According to HTML5, "the best representation of the number n as a floating
2824     // point number" is a string produced by applying ToString() to n.
2825     NumberToStringBuffer buffer;
2826     unsigned length = numberToString(number, buffer);
2827     return String(buffer, length);
2828 }
2829
2830 // FIXME: Move this function to a more appropriate place.
2831 bool parseToDoubleForNumberType(const String& src, double* out)
2832 {
2833     // See HTML5 2.4.4.3 `Real numbers.'
2834
2835     if (src.isEmpty())
2836         return false;
2837     // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5.
2838     // So, check the first character.
2839     if (src[0] != '-' && (src[0] < '0' || src[0] > '9'))
2840         return false;
2841
2842     bool valid = false;
2843     double value = src.toDouble(&valid);
2844     if (!valid)
2845         return false;
2846     // NaN and Infinity are not valid numbers according to the standard.
2847     if (!isfinite(value))
2848         return false;
2849     // -0 -> 0
2850     if (!value)
2851         value = 0;
2852     if (out)
2853         *out = value;
2854     return true;
2855 }
2856
2857 }