<!doctype html><div><body><frameset> doesn't parse correctly
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 static const int uninitializedLineNumberValue = -1;
60
61 static TextPosition uninitializedPositionValue1()
62 {
63     return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
64 }
65
66 namespace {
67
68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70     return isHTMLSpace(character) || character == replacementCharacter;
71 }
72
73 inline bool isAllWhitespace(const String& string)
74 {
75     return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77
78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82
83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85     return tagName == h1Tag
86         || tagName == h2Tag
87         || tagName == h3Tag
88         || tagName == h4Tag
89         || tagName == h5Tag
90         || tagName == h6Tag;
91 }
92
93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95     return tagName == captionTag
96         || tagName == colTag
97         || tagName == colgroupTag;
98 }
99
100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102     return tagName == thTag || tagName == tdTag;
103 }
104
105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107     return tagName == tbodyTag
108         || tagName == tfootTag
109         || tagName == theadTag;
110 }
111
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
113 bool isSpecialNode(Node* node)
114 {
115     if (node->hasTagName(MathMLNames::miTag)
116         || node->hasTagName(MathMLNames::moTag)
117         || node->hasTagName(MathMLNames::mnTag)
118         || node->hasTagName(MathMLNames::msTag)
119         || node->hasTagName(MathMLNames::mtextTag)
120         || node->hasTagName(MathMLNames::annotation_xmlTag)
121         || node->hasTagName(SVGNames::foreignObjectTag)
122         || node->hasTagName(SVGNames::descTag)
123         || node->hasTagName(SVGNames::titleTag))
124         return true;
125     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126         return true;
127     if (!isInHTMLNamespace(node))
128         return false;
129     const AtomicString& tagName = node->localName();
130     return tagName == addressTag
131         || tagName == appletTag
132         || tagName == areaTag
133         || tagName == articleTag
134         || tagName == asideTag
135         || tagName == baseTag
136         || tagName == basefontTag
137         || tagName == bgsoundTag
138         || tagName == blockquoteTag
139         || tagName == bodyTag
140         || tagName == brTag
141         || tagName == buttonTag
142         || tagName == captionTag
143         || tagName == centerTag
144         || tagName == colTag
145         || tagName == colgroupTag
146         || tagName == commandTag
147         || tagName == ddTag
148         || tagName == detailsTag
149         || tagName == dirTag
150         || tagName == divTag
151         || tagName == dlTag
152         || tagName == dtTag
153         || tagName == embedTag
154         || tagName == fieldsetTag
155         || tagName == figcaptionTag
156         || tagName == figureTag
157         || tagName == footerTag
158         || tagName == formTag
159         || tagName == frameTag
160         || tagName == framesetTag
161         || isNumberedHeaderTag(tagName)
162         || tagName == headTag
163         || tagName == headerTag
164         || tagName == hgroupTag
165         || tagName == hrTag
166         || tagName == htmlTag
167         || tagName == iframeTag
168         || tagName == imgTag
169         || tagName == inputTag
170         || tagName == isindexTag
171         || tagName == liTag
172         || tagName == linkTag
173         || tagName == listingTag
174         || tagName == marqueeTag
175         || tagName == menuTag
176         || tagName == metaTag
177         || tagName == navTag
178         || tagName == noembedTag
179         || tagName == noframesTag
180         || tagName == noscriptTag
181         || tagName == objectTag
182         || tagName == olTag
183         || tagName == pTag
184         || tagName == paramTag
185         || tagName == plaintextTag
186         || tagName == preTag
187         || tagName == scriptTag
188         || tagName == sectionTag
189         || tagName == selectTag
190         || tagName == styleTag
191         || tagName == summaryTag
192         || tagName == tableTag
193         || isTableBodyContextTag(tagName)
194         || tagName == tdTag
195         || tagName == textareaTag
196         || tagName == thTag
197         || tagName == titleTag
198         || tagName == trTag
199         || tagName == ulTag
200         || tagName == wbrTag
201         || tagName == xmpTag;
202 }
203
204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 {
206     return tagName == bTag
207         || tagName == bigTag
208         || tagName == codeTag
209         || tagName == emTag
210         || tagName == fontTag
211         || tagName == iTag
212         || tagName == sTag
213         || tagName == smallTag
214         || tagName == strikeTag
215         || tagName == strongTag
216         || tagName == ttTag
217         || tagName == uTag;
218 }
219
220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 {
222     return tagName == nobrTag
223         || isNonAnchorNonNobrFormattingTag(tagName);
224 }
225
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
227 bool isFormattingTag(const AtomicString& tagName)
228 {
229     return tagName == aTag || isNonAnchorFormattingTag(tagName);
230 }
231
232 HTMLFormElement* closestFormAncestor(Element* element)
233 {
234     while (element) {
235         if (element->hasTagName(formTag))
236             return static_cast<HTMLFormElement*>(element);
237         ContainerNode* parent = element->parentNode();
238         if (!parent || !parent->isElementNode())
239             return 0;
240         element = static_cast<Element*>(parent);
241     }
242     return 0;
243 }
244
245 } // namespace
246
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 public:
250     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251         : m_current(token.characters().data())
252         , m_end(m_current + token.characters().size())
253     {
254         ASSERT(!isEmpty());
255     }
256
257     explicit ExternalCharacterTokenBuffer(const String& string)
258         : m_current(string.characters())
259         , m_end(m_current + string.length())
260     {
261         ASSERT(!isEmpty());
262     }
263
264     ~ExternalCharacterTokenBuffer()
265     {
266         ASSERT(isEmpty());
267     }
268
269     bool isEmpty() const { return m_current == m_end; }
270
271     void skipAtMostOneLeadingNewline()
272     {
273         ASSERT(!isEmpty());
274         if (*m_current == '\n')
275             ++m_current;
276     }
277
278     void skipLeadingWhitespace()
279     {
280         skipLeading<isHTMLSpace>();
281     }
282
283     String takeLeadingWhitespace()
284     {
285         return takeLeading<isHTMLSpace>();
286     }
287
288     void skipLeadingNonWhitespace()
289     {
290         skipLeading<isNotHTMLSpace>();
291     }
292
293     String takeRemaining()
294     {
295         ASSERT(!isEmpty());
296         const UChar* start = m_current;
297         m_current = m_end;
298         return String(start, m_current - start);
299     }
300
301     void giveRemainingTo(StringBuilder& recipient)
302     {
303         recipient.append(m_current, m_end - m_current);
304         m_current = m_end;
305     }
306
307     String takeRemainingWhitespace()
308     {
309         ASSERT(!isEmpty());
310         Vector<UChar> whitespace;
311         do {
312             UChar cc = *m_current++;
313             if (isHTMLSpace(cc))
314                 whitespace.append(cc);
315         } while (m_current < m_end);
316         // Returning the null string when there aren't any whitespace
317         // characters is slightly cleaner semantically because we don't want
318         // to insert a text node (as opposed to inserting an empty text node).
319         if (whitespace.isEmpty())
320             return String();
321         return String::adopt(whitespace);
322     }
323
324 private:
325     template<bool characterPredicate(UChar)>
326     void skipLeading()
327     {
328         ASSERT(!isEmpty());
329         while (characterPredicate(*m_current)) {
330             if (++m_current == m_end)
331                 return;
332         }
333     }
334
335     template<bool characterPredicate(UChar)>
336     String takeLeading()
337     {
338         ASSERT(!isEmpty());
339         const UChar* start = m_current;
340         skipLeading<characterPredicate>();
341         if (start == m_current)
342             return String();
343         return String(start, m_current - start);
344     }
345
346     const UChar* m_current;
347     const UChar* m_end;
348 };
349
350
351 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
352     : m_framesetOk(true)
353     , m_document(document)
354     , m_tree(document, maximumDOMTreeDepth)
355     , m_reportErrors(reportErrors)
356     , m_isPaused(false)
357     , m_insertionMode(InitialMode)
358     , m_originalInsertionMode(InitialMode)
359     , m_shouldSkipLeadingNewline(false)
360     , m_parser(parser)
361     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
362     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
363     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
364 {
365 }
366
367 // FIXME: Member variables should be grouped into self-initializing structs to
368 // minimize code duplication between these constructors.
369 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
370     : m_framesetOk(true)
371     , m_fragmentContext(fragment, contextElement, scriptingPermission)
372     , m_document(fragment->document())
373     , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
374     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
375     , m_isPaused(false)
376     , m_insertionMode(InitialMode)
377     , m_originalInsertionMode(InitialMode)
378     , m_shouldSkipLeadingNewline(false)
379     , m_parser(parser)
380     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
381     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
382     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
383 {
384     // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
385     ASSERT(contextElement);
386     if (contextElement) {
387         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
388         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
389         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
390         // and instead use the DocumentFragment as a root node.
391         m_tree.openElements()->pushRootNode(fragment);
392         resetInsertionModeAppropriately();
393         m_tree.setForm(closestFormAncestor(contextElement));
394     }
395 }
396
397 HTMLTreeBuilder::~HTMLTreeBuilder()
398 {
399 }
400
401 void HTMLTreeBuilder::detach()
402 {
403     // This call makes little sense in fragment mode, but for consistency
404     // DocumentParser expects detach() to always be called before it's destroyed.
405     m_document = 0;
406     // HTMLConstructionSite might be on the callstack when detach() is called
407     // otherwise we'd just call m_tree.clear() here instead.
408     m_tree.detach();
409 }
410
411 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
412     : m_fragment(0)
413     , m_contextElement(0)
414     , m_scriptingPermission(FragmentScriptingAllowed)
415 {
416 }
417
418 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
419     : m_fragment(fragment)
420     , m_contextElement(contextElement)
421     , m_scriptingPermission(scriptingPermission)
422 {
423     ASSERT(!fragment->hasChildNodes());
424 }
425
426 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
427 {
428 }
429
430 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
431 {
432     // Unpause ourselves, callers may pause us again when processing the script.
433     // The HTML5 spec is written as though scripts are executed inside the tree
434     // builder.  We pause the parser to exit the tree builder, and then resume
435     // before running scripts.
436     m_isPaused = false;
437     scriptStartPosition = m_scriptToProcessStartPosition;
438     m_scriptToProcessStartPosition = uninitializedPositionValue1();
439     return m_scriptToProcess.release();
440 }
441
442 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
443 {
444     AtomicHTMLToken token(rawToken);
445
446     // We clear the rawToken in case constructTreeFromAtomicToken
447     // synchronously re-enters the parser. We don't clear the token immedately
448     // for Character tokens because the AtomicHTMLToken avoids copying the
449     // characters by keeping a pointer to the underlying buffer in the
450     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
451     // the parser.
452     //
453     // FIXME: Top clearing the rawToken once we start running the parser off
454     // the main thread or once we stop allowing synchronous JavaScript
455     // execution from parseMappedAttribute.
456     if (rawToken.type() != HTMLTokenTypes::Character)
457         rawToken.clear();
458
459     constructTreeFromAtomicToken(token);
460
461     if (!rawToken.isUninitialized()) {
462         ASSERT(rawToken.type() == HTMLTokenTypes::Character);
463         rawToken.clear();
464     }
465 }
466
467 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
468 {
469     if (shouldProcessTokenInForeignContent(token))
470         processTokenInForeignContent(token);
471     else
472         processToken(token);
473
474     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
475     // the U+0000 characters into replacement characters has compatibility
476     // problems.
477     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode);
478     m_parser->tokenizer()->setShouldAllowCDATA(!m_tree.isEmpty() && !isInHTMLNamespace(m_tree.currentNode()));
479 }
480
481 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
482 {
483     switch (token.type()) {
484     case HTMLTokenTypes::Uninitialized:
485         ASSERT_NOT_REACHED();
486         break;
487     case HTMLTokenTypes::DOCTYPE:
488         m_shouldSkipLeadingNewline = false;
489         processDoctypeToken(token);
490         break;
491     case HTMLTokenTypes::StartTag:
492         m_shouldSkipLeadingNewline = false;
493         processStartTag(token);
494         break;
495     case HTMLTokenTypes::EndTag:
496         m_shouldSkipLeadingNewline = false;
497         processEndTag(token);
498         break;
499     case HTMLTokenTypes::Comment:
500         m_shouldSkipLeadingNewline = false;
501         processComment(token);
502         return;
503     case HTMLTokenTypes::Character:
504         processCharacter(token);
505         break;
506     case HTMLTokenTypes::EndOfFile:
507         m_shouldSkipLeadingNewline = false;
508         processEndOfFile(token);
509         break;
510     }
511 }
512
513 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
514 {
515     ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
516     if (m_insertionMode == InitialMode) {
517         m_tree.insertDoctype(token);
518         setInsertionMode(BeforeHTMLMode);
519         return;
520     }
521     if (m_insertionMode == InTableTextMode) {
522         defaultForInTableText();
523         processDoctypeToken(token);
524         return;
525     }
526     parseError(token);
527 }
528
529 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
530 {
531     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
532     AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
533     processStartTag(fakeToken);
534 }
535
536 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
537 {
538     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
539     AtomicHTMLToken fakeToken(HTMLTokenTypes::EndTag, tagName.localName());
540     processEndTag(fakeToken);
541 }
542
543 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
544 {
545     ASSERT(!characters.isEmpty());
546     ExternalCharacterTokenBuffer buffer(characters);
547     processCharacterBuffer(buffer);
548 }
549
550 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
551 {
552     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
553         return;
554     AtomicHTMLToken endP(HTMLTokenTypes::EndTag, pTag.localName());
555     processEndTag(endP);
556 }
557
558 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
559 {
560     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
561     if (!attributes)
562         attributes = NamedNodeMap::create();
563     else {
564         attributes->removeAttribute(nameAttr);
565         attributes->removeAttribute(actionAttr);
566         attributes->removeAttribute(promptAttr);
567     }
568
569     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
570     attributes->insertAttribute(mappedAttribute.release(), false);
571     return attributes.release();
572 }
573
574 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
575 {
576     ASSERT(token.type() == HTMLTokenTypes::StartTag);
577     ASSERT(token.name() == isindexTag);
578     parseError(token);
579     if (m_tree.form())
580         return;
581     notImplemented(); // Acknowledge self-closing flag
582     processFakeStartTag(formTag);
583     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
584     if (actionAttribute) {
585         ASSERT(m_tree.currentElement()->hasTagName(formTag));
586         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
587     }
588     processFakeStartTag(hrTag);
589     processFakeStartTag(labelTag);
590     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
591     if (promptAttribute)
592         processFakeCharacters(promptAttribute->value());
593     else
594         processFakeCharacters(searchableIndexIntroduction());
595     processFakeStartTag(inputTag, attributesForIsindexInput(token));
596     notImplemented(); // This second set of characters may be needed by non-english locales.
597     processFakeEndTag(labelTag);
598     processFakeStartTag(hrTag);
599     processFakeEndTag(formTag);
600 }
601
602 namespace {
603
604 bool isLi(const ContainerNode* element)
605 {
606     return element->hasTagName(liTag);
607 }
608
609 bool isDdOrDt(const ContainerNode* element)
610 {
611     return element->hasTagName(ddTag)
612         || element->hasTagName(dtTag);
613 }
614
615 }
616
617 template <bool shouldClose(const ContainerNode*)>
618 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
619 {
620     m_framesetOk = false;
621     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
622     while (1) {
623         RefPtr<ContainerNode> node = nodeRecord->node();
624         if (shouldClose(node.get())) {
625             ASSERT(node->isElementNode());
626             processFakeEndTag(toElement(node.get())->tagQName());
627             break;
628         }
629         if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
630             break;
631         nodeRecord = nodeRecord->next();
632     }
633     processFakePEndTagIfPInButtonScope();
634     m_tree.insertHTMLElement(token);
635 }
636
637 namespace {
638
639 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
640
641 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
642 {
643     for (size_t i = 0; i < length; ++i) {
644         const QualifiedName& name = *names[i];
645         const AtomicString& localName = name.localName();
646         AtomicString loweredLocalName = localName.lower();
647         if (loweredLocalName != localName)
648             map->add(loweredLocalName, name);
649     }
650 }
651
652 void adjustSVGTagNameCase(AtomicHTMLToken& token)
653 {
654     static PrefixedNameToQualifiedNameMap* caseMap = 0;
655     if (!caseMap) {
656         caseMap = new PrefixedNameToQualifiedNameMap;
657         size_t length = 0;
658         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
659         mapLoweredLocalNameToName(caseMap, svgTags, length);
660     }
661
662     const QualifiedName& casedName = caseMap->get(token.name());
663     if (casedName.localName().isNull())
664         return;
665     token.setName(casedName.localName());
666 }
667
668 template<QualifiedName** getAttrs(size_t* length)>
669 void adjustAttributes(AtomicHTMLToken& token)
670 {
671     static PrefixedNameToQualifiedNameMap* caseMap = 0;
672     if (!caseMap) {
673         caseMap = new PrefixedNameToQualifiedNameMap;
674         size_t length = 0;
675         QualifiedName** attrs = getAttrs(&length);
676         mapLoweredLocalNameToName(caseMap, attrs, length);
677     }
678
679     NamedNodeMap* attributes = token.attributes();
680     if (!attributes)
681         return;
682
683     for (unsigned x = 0; x < attributes->length(); ++x) {
684         Attribute* attribute = attributes->attributeItem(x);
685         const QualifiedName& casedName = caseMap->get(attribute->localName());
686         if (!casedName.localName().isNull())
687             attribute->parserSetName(casedName);
688     }
689 }
690
691 void adjustSVGAttributes(AtomicHTMLToken& token)
692 {
693     adjustAttributes<SVGNames::getSVGAttrs>(token);
694 }
695
696 void adjustMathMLAttributes(AtomicHTMLToken& token)
697 {
698     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
699 }
700
701 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
702 {
703     for (size_t i = 0; i < length; ++i) {
704         QualifiedName* name = names[i];
705         const AtomicString& localName = name->localName();
706         AtomicString prefixColonLocalName = prefix + ':' + localName;
707         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
708         map->add(prefixColonLocalName, nameWithPrefix);
709     }
710 }
711
712 void adjustForeignAttributes(AtomicHTMLToken& token)
713 {
714     static PrefixedNameToQualifiedNameMap* map = 0;
715     if (!map) {
716         map = new PrefixedNameToQualifiedNameMap;
717         size_t length = 0;
718         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
719         addNamesWithPrefix(map, "xlink", attrs, length);
720
721         attrs = XMLNames::getXMLAttrs(&length);
722         addNamesWithPrefix(map, "xml", attrs, length);
723
724         map->add("xmlns", XMLNSNames::xmlnsAttr);
725         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
726     }
727
728     NamedNodeMap* attributes = token.attributes();
729     if (!attributes)
730         return;
731
732     for (unsigned x = 0; x < attributes->length(); ++x) {
733         Attribute* attribute = attributes->attributeItem(x);
734         const QualifiedName& name = map->get(attribute->localName());
735         if (!name.localName().isNull())
736             attribute->parserSetName(name);
737     }
738 }
739
740 }
741
742 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
743 {
744     ASSERT(token.type() == HTMLTokenTypes::StartTag);
745     if (token.name() == htmlTag) {
746         m_tree.insertHTMLHtmlStartTagInBody(token);
747         return;
748     }
749     if (token.name() == baseTag
750         || token.name() == basefontTag
751         || token.name() == bgsoundTag
752         || token.name() == commandTag
753         || token.name() == linkTag
754         || token.name() == metaTag
755         || token.name() == noframesTag
756         || token.name() == scriptTag
757         || token.name() == styleTag
758         || token.name() == titleTag) {
759         bool didProcess = processStartTagForInHead(token);
760         ASSERT_UNUSED(didProcess, didProcess);
761         return;
762     }
763     if (token.name() == bodyTag) {
764         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
765             ASSERT(isParsingFragment());
766             return;
767         }
768         m_framesetOk = false;
769         m_tree.insertHTMLBodyStartTagInBody(token);
770         return;
771     }
772     if (token.name() == framesetTag) {
773         parseError(token);
774         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
775             ASSERT(isParsingFragment());
776             return;
777         }
778         if (!m_framesetOk)
779             return;
780         ExceptionCode ec = 0;
781         m_tree.openElements()->bodyElement()->remove(ec);
782         ASSERT(!ec);
783         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
784         m_tree.openElements()->popHTMLBodyElement();
785         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
786         m_tree.insertHTMLElement(token);
787         setInsertionMode(InFramesetMode);
788         return;
789     }
790     if (token.name() == addressTag
791         || token.name() == articleTag
792         || token.name() == asideTag
793         || token.name() == blockquoteTag
794         || token.name() == centerTag
795         || token.name() == detailsTag
796         || token.name() == dirTag
797         || token.name() == divTag
798         || token.name() == dlTag
799         || token.name() == fieldsetTag
800         || token.name() == figcaptionTag
801         || token.name() == figureTag
802         || token.name() == footerTag
803         || token.name() == headerTag
804         || token.name() == hgroupTag
805         || token.name() == menuTag
806         || token.name() == navTag
807         || token.name() == olTag
808         || token.name() == pTag
809         || token.name() == sectionTag
810         || token.name() == summaryTag
811         || token.name() == ulTag) {
812         processFakePEndTagIfPInButtonScope();
813         m_tree.insertHTMLElement(token);
814         return;
815     }
816     if (isNumberedHeaderTag(token.name())) {
817         processFakePEndTagIfPInButtonScope();
818         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
819             parseError(token);
820             m_tree.openElements()->pop();
821         }
822         m_tree.insertHTMLElement(token);
823         return;
824     }
825     if (token.name() == preTag || token.name() == listingTag) {
826         processFakePEndTagIfPInButtonScope();
827         m_tree.insertHTMLElement(token);
828         m_shouldSkipLeadingNewline = true;
829         m_framesetOk = false;
830         return;
831     }
832     if (token.name() == formTag) {
833         if (m_tree.form()) {
834             parseError(token);
835             return;
836         }
837         processFakePEndTagIfPInButtonScope();
838         m_tree.insertHTMLFormElement(token);
839         return;
840     }
841     if (token.name() == liTag) {
842         processCloseWhenNestedTag<isLi>(token);
843         return;
844     }
845     if (token.name() == ddTag || token.name() == dtTag) {
846         processCloseWhenNestedTag<isDdOrDt>(token);
847         return;
848     }
849     if (token.name() == plaintextTag) {
850         processFakePEndTagIfPInButtonScope();
851         m_tree.insertHTMLElement(token);
852         m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
853         return;
854     }
855     if (token.name() == buttonTag) {
856         if (m_tree.openElements()->inScope(buttonTag)) {
857             parseError(token);
858             processFakeEndTag(buttonTag);
859             processStartTag(token); // FIXME: Could we just fall through here?
860             return;
861         }
862         m_tree.reconstructTheActiveFormattingElements();
863         m_tree.insertHTMLElement(token);
864         m_framesetOk = false;
865         return;
866     }
867     if (token.name() == aTag) {
868         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
869         if (activeATag) {
870             parseError(token);
871             processFakeEndTag(aTag);
872             m_tree.activeFormattingElements()->remove(activeATag);
873             if (m_tree.openElements()->contains(activeATag))
874                 m_tree.openElements()->remove(activeATag);
875         }
876         m_tree.reconstructTheActiveFormattingElements();
877         m_tree.insertFormattingElement(token);
878         return;
879     }
880     if (isNonAnchorNonNobrFormattingTag(token.name())) {
881         m_tree.reconstructTheActiveFormattingElements();
882         m_tree.insertFormattingElement(token);
883         return;
884     }
885     if (token.name() == nobrTag) {
886         m_tree.reconstructTheActiveFormattingElements();
887         if (m_tree.openElements()->inScope(nobrTag)) {
888             parseError(token);
889             processFakeEndTag(nobrTag);
890             m_tree.reconstructTheActiveFormattingElements();
891         }
892         m_tree.insertFormattingElement(token);
893         return;
894     }
895     if (token.name() == appletTag
896         || token.name() == marqueeTag
897         || token.name() == objectTag) {
898         m_tree.reconstructTheActiveFormattingElements();
899         m_tree.insertHTMLElement(token);
900         m_tree.activeFormattingElements()->appendMarker();
901         m_framesetOk = false;
902         return;
903     }
904     if (token.name() == tableTag) {
905         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
906             processFakeEndTag(pTag);
907         m_tree.insertHTMLElement(token);
908         m_framesetOk = false;
909         setInsertionMode(InTableMode);
910         return;
911     }
912     if (token.name() == imageTag) {
913         parseError(token);
914         // Apparently we're not supposed to ask.
915         token.setName(imgTag.localName());
916         // Note the fall through to the imgTag handling below!
917     }
918     if (token.name() == areaTag
919         || token.name() == brTag
920         || token.name() == embedTag
921         || token.name() == imgTag
922         || token.name() == keygenTag
923         || token.name() == wbrTag) {
924         m_tree.reconstructTheActiveFormattingElements();
925         m_tree.insertSelfClosingHTMLElement(token);
926         m_framesetOk = false;
927         return;
928     }
929     if (token.name() == inputTag) {
930         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
931         m_tree.reconstructTheActiveFormattingElements();
932         m_tree.insertSelfClosingHTMLElement(token);
933         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
934             m_framesetOk = false;
935         return;
936     }
937     if (token.name() == paramTag
938         || token.name() == sourceTag
939         || token.name() == trackTag) {
940         m_tree.insertSelfClosingHTMLElement(token);
941         return;
942     }
943     if (token.name() == hrTag) {
944         processFakePEndTagIfPInButtonScope();
945         m_tree.insertSelfClosingHTMLElement(token);
946         m_framesetOk = false;
947         return;
948     }
949     if (token.name() == isindexTag) {
950         processIsindexStartTagForInBody(token);
951         return;
952     }
953     if (token.name() == textareaTag) {
954         m_tree.insertHTMLElement(token);
955         m_shouldSkipLeadingNewline = true;
956         m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
957         m_originalInsertionMode = m_insertionMode;
958         m_framesetOk = false;
959         setInsertionMode(TextMode);
960         return;
961     }
962     if (token.name() == xmpTag) {
963         processFakePEndTagIfPInButtonScope();
964         m_tree.reconstructTheActiveFormattingElements();
965         m_framesetOk = false;
966         processGenericRawTextStartTag(token);
967         return;
968     }
969     if (token.name() == iframeTag) {
970         m_framesetOk = false;
971         processGenericRawTextStartTag(token);
972         return;
973     }
974     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
975         processGenericRawTextStartTag(token);
976         return;
977     }
978     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
979         processGenericRawTextStartTag(token);
980         return;
981     }
982     if (token.name() == selectTag) {
983         m_tree.reconstructTheActiveFormattingElements();
984         m_tree.insertHTMLElement(token);
985         m_framesetOk = false;
986         if (m_insertionMode == InTableMode
987              || m_insertionMode == InCaptionMode
988              || m_insertionMode == InColumnGroupMode
989              || m_insertionMode == InTableBodyMode
990              || m_insertionMode == InRowMode
991              || m_insertionMode == InCellMode)
992             setInsertionMode(InSelectInTableMode);
993         else
994             setInsertionMode(InSelectMode);
995         return;
996     }
997     if (token.name() == optgroupTag || token.name() == optionTag) {
998         if (m_tree.openElements()->inScope(optionTag.localName())) {
999             AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1000             processEndTag(endOption);
1001         }
1002         m_tree.reconstructTheActiveFormattingElements();
1003         m_tree.insertHTMLElement(token);
1004         return;
1005     }
1006     if (token.name() == rpTag || token.name() == rtTag) {
1007         if (m_tree.openElements()->inScope(rubyTag.localName())) {
1008             m_tree.generateImpliedEndTags();
1009             if (!m_tree.currentNode()->hasTagName(rubyTag))
1010                 parseError(token);
1011         }
1012         m_tree.insertHTMLElement(token);
1013         return;
1014     }
1015     if (token.name() == MathMLNames::mathTag.localName()) {
1016         m_tree.reconstructTheActiveFormattingElements();
1017         adjustMathMLAttributes(token);
1018         adjustForeignAttributes(token);
1019         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1020         return;
1021     }
1022     if (token.name() == SVGNames::svgTag.localName()) {
1023         m_tree.reconstructTheActiveFormattingElements();
1024         adjustSVGAttributes(token);
1025         adjustForeignAttributes(token);
1026         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1027         return;
1028     }
1029     if (isCaptionColOrColgroupTag(token.name())
1030         || token.name() == frameTag
1031         || token.name() == headTag
1032         || isTableBodyContextTag(token.name())
1033         || isTableCellContextTag(token.name())
1034         || token.name() == trTag) {
1035         parseError(token);
1036         return;
1037     }
1038     m_tree.reconstructTheActiveFormattingElements();
1039     m_tree.insertHTMLElement(token);
1040 }
1041
1042 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1043 {
1044     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1045         ASSERT(isParsingFragment());
1046         // FIXME: parse error
1047         return false;
1048     }
1049     m_tree.openElements()->pop();
1050     setInsertionMode(InTableMode);
1051     return true;
1052 }
1053
1054 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1055 void HTMLTreeBuilder::closeTheCell()
1056 {
1057     ASSERT(insertionMode() == InCellMode);
1058     if (m_tree.openElements()->inTableScope(tdTag)) {
1059         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1060         processFakeEndTag(tdTag);
1061         return;
1062     }
1063     ASSERT(m_tree.openElements()->inTableScope(thTag));
1064     processFakeEndTag(thTag);
1065     ASSERT(insertionMode() == InRowMode);
1066 }
1067
1068 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1069 {
1070     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1071     if (token.name() == captionTag) {
1072         m_tree.openElements()->popUntilTableScopeMarker();
1073         m_tree.activeFormattingElements()->appendMarker();
1074         m_tree.insertHTMLElement(token);
1075         setInsertionMode(InCaptionMode);
1076         return;
1077     }
1078     if (token.name() == colgroupTag) {
1079         m_tree.openElements()->popUntilTableScopeMarker();
1080         m_tree.insertHTMLElement(token);
1081         setInsertionMode(InColumnGroupMode);
1082         return;
1083     }
1084     if (token.name() == colTag) {
1085         processFakeStartTag(colgroupTag);
1086         ASSERT(InColumnGroupMode);
1087         processStartTag(token);
1088         return;
1089     }
1090     if (isTableBodyContextTag(token.name())) {
1091         m_tree.openElements()->popUntilTableScopeMarker();
1092         m_tree.insertHTMLElement(token);
1093         setInsertionMode(InTableBodyMode);
1094         return;
1095     }
1096     if (isTableCellContextTag(token.name())
1097         || token.name() == trTag) {
1098         processFakeStartTag(tbodyTag);
1099         ASSERT(insertionMode() == InTableBodyMode);
1100         processStartTag(token);
1101         return;
1102     }
1103     if (token.name() == tableTag) {
1104         parseError(token);
1105         if (!processTableEndTagForInTable()) {
1106             ASSERT(isParsingFragment());
1107             return;
1108         }
1109         processStartTag(token);
1110         return;
1111     }
1112     if (token.name() == styleTag || token.name() == scriptTag) {
1113         processStartTagForInHead(token);
1114         return;
1115     }
1116     if (token.name() == inputTag) {
1117         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1118         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1119             parseError(token);
1120             m_tree.insertSelfClosingHTMLElement(token);
1121             return;
1122         }
1123         // Fall through to "anything else" case.
1124     }
1125     if (token.name() == formTag) {
1126         parseError(token);
1127         if (m_tree.form())
1128             return;
1129         m_tree.insertHTMLFormElement(token, true);
1130         m_tree.openElements()->pop();
1131         return;
1132     }
1133     parseError(token);
1134     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1135     processStartTagForInBody(token);
1136 }
1137
1138 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1139 {
1140     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1141     switch (insertionMode()) {
1142     case InitialMode:
1143         ASSERT(insertionMode() == InitialMode);
1144         defaultForInitial();
1145         // Fall through.
1146     case BeforeHTMLMode:
1147         ASSERT(insertionMode() == BeforeHTMLMode);
1148         if (token.name() == htmlTag) {
1149             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1150             setInsertionMode(BeforeHeadMode);
1151             return;
1152         }
1153         defaultForBeforeHTML();
1154         // Fall through.
1155     case BeforeHeadMode:
1156         ASSERT(insertionMode() == BeforeHeadMode);
1157         if (token.name() == htmlTag) {
1158             m_tree.insertHTMLHtmlStartTagInBody(token);
1159             return;
1160         }
1161         if (token.name() == headTag) {
1162             m_tree.insertHTMLHeadElement(token);
1163             setInsertionMode(InHeadMode);
1164             return;
1165         }
1166         defaultForBeforeHead();
1167         // Fall through.
1168     case InHeadMode:
1169         ASSERT(insertionMode() == InHeadMode);
1170         if (processStartTagForInHead(token))
1171             return;
1172         defaultForInHead();
1173         // Fall through.
1174     case AfterHeadMode:
1175         ASSERT(insertionMode() == AfterHeadMode);
1176         if (token.name() == htmlTag) {
1177             m_tree.insertHTMLHtmlStartTagInBody(token);
1178             return;
1179         }
1180         if (token.name() == bodyTag) {
1181             m_framesetOk = false;
1182             m_tree.insertHTMLBodyElement(token);
1183             setInsertionMode(InBodyMode);
1184             return;
1185         }
1186         if (token.name() == framesetTag) {
1187             m_tree.insertHTMLElement(token);
1188             setInsertionMode(InFramesetMode);
1189             return;
1190         }
1191         if (token.name() == baseTag
1192             || token.name() == basefontTag
1193             || token.name() == bgsoundTag
1194             || token.name() == linkTag
1195             || token.name() == metaTag
1196             || token.name() == noframesTag
1197             || token.name() == scriptTag
1198             || token.name() == styleTag
1199             || token.name() == titleTag) {
1200             parseError(token);
1201             ASSERT(m_tree.head());
1202             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1203             processStartTagForInHead(token);
1204             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1205             return;
1206         }
1207         if (token.name() == headTag) {
1208             parseError(token);
1209             return;
1210         }
1211         defaultForAfterHead();
1212         // Fall through
1213     case InBodyMode:
1214         ASSERT(insertionMode() == InBodyMode);
1215         processStartTagForInBody(token);
1216         break;
1217     case InTableMode:
1218         ASSERT(insertionMode() == InTableMode);
1219         processStartTagForInTable(token);
1220         break;
1221     case InCaptionMode:
1222         ASSERT(insertionMode() == InCaptionMode);
1223         if (isCaptionColOrColgroupTag(token.name())
1224             || isTableBodyContextTag(token.name())
1225             || isTableCellContextTag(token.name())
1226             || token.name() == trTag) {
1227             parseError(token);
1228             if (!processCaptionEndTagForInCaption()) {
1229                 ASSERT(isParsingFragment());
1230                 return;
1231             }
1232             processStartTag(token);
1233             return;
1234         }
1235         processStartTagForInBody(token);
1236         break;
1237     case InColumnGroupMode:
1238         ASSERT(insertionMode() == InColumnGroupMode);
1239         if (token.name() == htmlTag) {
1240             m_tree.insertHTMLHtmlStartTagInBody(token);
1241             return;
1242         }
1243         if (token.name() == colTag) {
1244             m_tree.insertSelfClosingHTMLElement(token);
1245             return;
1246         }
1247         if (!processColgroupEndTagForInColumnGroup()) {
1248             ASSERT(isParsingFragment());
1249             return;
1250         }
1251         processStartTag(token);
1252         break;
1253     case InTableBodyMode:
1254         ASSERT(insertionMode() == InTableBodyMode);
1255         if (token.name() == trTag) {
1256             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1257             m_tree.insertHTMLElement(token);
1258             setInsertionMode(InRowMode);
1259             return;
1260         }
1261         if (isTableCellContextTag(token.name())) {
1262             parseError(token);
1263             processFakeStartTag(trTag);
1264             ASSERT(insertionMode() == InRowMode);
1265             processStartTag(token);
1266             return;
1267         }
1268         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1269             // FIXME: This is slow.
1270             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1271                 ASSERT(isParsingFragment());
1272                 parseError(token);
1273                 return;
1274             }
1275             m_tree.openElements()->popUntilTableBodyScopeMarker();
1276             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1277             processFakeEndTag(m_tree.currentElement()->tagQName());
1278             processStartTag(token);
1279             return;
1280         }
1281         processStartTagForInTable(token);
1282         break;
1283     case InRowMode:
1284         ASSERT(insertionMode() == InRowMode);
1285         if (isTableCellContextTag(token.name())) {
1286             m_tree.openElements()->popUntilTableRowScopeMarker();
1287             m_tree.insertHTMLElement(token);
1288             setInsertionMode(InCellMode);
1289             m_tree.activeFormattingElements()->appendMarker();
1290             return;
1291         }
1292         if (token.name() == trTag
1293             || isCaptionColOrColgroupTag(token.name())
1294             || isTableBodyContextTag(token.name())) {
1295             if (!processTrEndTagForInRow()) {
1296                 ASSERT(isParsingFragment());
1297                 return;
1298             }
1299             ASSERT(insertionMode() == InTableBodyMode);
1300             processStartTag(token);
1301             return;
1302         }
1303         processStartTagForInTable(token);
1304         break;
1305     case InCellMode:
1306         ASSERT(insertionMode() == InCellMode);
1307         if (isCaptionColOrColgroupTag(token.name())
1308             || isTableCellContextTag(token.name())
1309             || token.name() == trTag
1310             || isTableBodyContextTag(token.name())) {
1311             // FIXME: This could be more efficient.
1312             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1313                 ASSERT(isParsingFragment());
1314                 parseError(token);
1315                 return;
1316             }
1317             closeTheCell();
1318             processStartTag(token);
1319             return;
1320         }
1321         processStartTagForInBody(token);
1322         break;
1323     case AfterBodyMode:
1324     case AfterAfterBodyMode:
1325         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1326         if (token.name() == htmlTag) {
1327             m_tree.insertHTMLHtmlStartTagInBody(token);
1328             return;
1329         }
1330         setInsertionMode(InBodyMode);
1331         processStartTag(token);
1332         break;
1333     case InHeadNoscriptMode:
1334         ASSERT(insertionMode() == InHeadNoscriptMode);
1335         if (token.name() == htmlTag) {
1336             m_tree.insertHTMLHtmlStartTagInBody(token);
1337             return;
1338         }
1339         if (token.name() == basefontTag
1340             || token.name() == bgsoundTag
1341             || token.name() == linkTag
1342             || token.name() == metaTag
1343             || token.name() == noframesTag
1344             || token.name() == styleTag) {
1345             bool didProcess = processStartTagForInHead(token);
1346             ASSERT_UNUSED(didProcess, didProcess);
1347             return;
1348         }
1349         if (token.name() == htmlTag || token.name() == noscriptTag) {
1350             parseError(token);
1351             return;
1352         }
1353         defaultForInHeadNoscript();
1354         processToken(token);
1355         break;
1356     case InFramesetMode:
1357         ASSERT(insertionMode() == InFramesetMode);
1358         if (token.name() == htmlTag) {
1359             m_tree.insertHTMLHtmlStartTagInBody(token);
1360             return;
1361         }
1362         if (token.name() == framesetTag) {
1363             m_tree.insertHTMLElement(token);
1364             return;
1365         }
1366         if (token.name() == frameTag) {
1367             m_tree.insertSelfClosingHTMLElement(token);
1368             return;
1369         }
1370         if (token.name() == noframesTag) {
1371             processStartTagForInHead(token);
1372             return;
1373         }
1374         parseError(token);
1375         break;
1376     case AfterFramesetMode:
1377     case AfterAfterFramesetMode:
1378         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1379         if (token.name() == htmlTag) {
1380             m_tree.insertHTMLHtmlStartTagInBody(token);
1381             return;
1382         }
1383         if (token.name() == noframesTag) {
1384             processStartTagForInHead(token);
1385             return;
1386         }
1387         parseError(token);
1388         break;
1389     case InSelectInTableMode:
1390         ASSERT(insertionMode() == InSelectInTableMode);
1391         if (token.name() == captionTag
1392             || token.name() == tableTag
1393             || isTableBodyContextTag(token.name())
1394             || token.name() == trTag
1395             || isTableCellContextTag(token.name())) {
1396             parseError(token);
1397             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1398             processEndTag(endSelect);
1399             processStartTag(token);
1400             return;
1401         }
1402         // Fall through
1403     case InSelectMode:
1404         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1405         if (token.name() == htmlTag) {
1406             m_tree.insertHTMLHtmlStartTagInBody(token);
1407             return;
1408         }
1409         if (token.name() == optionTag) {
1410             if (m_tree.currentNode()->hasTagName(optionTag)) {
1411                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1412                 processEndTag(endOption);
1413             }
1414             m_tree.insertHTMLElement(token);
1415             return;
1416         }
1417         if (token.name() == optgroupTag) {
1418             if (m_tree.currentNode()->hasTagName(optionTag)) {
1419                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1420                 processEndTag(endOption);
1421             }
1422             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1423                 AtomicHTMLToken endOptgroup(HTMLTokenTypes::EndTag, optgroupTag.localName());
1424                 processEndTag(endOptgroup);
1425             }
1426             m_tree.insertHTMLElement(token);
1427             return;
1428         }
1429         if (token.name() == selectTag) {
1430             parseError(token);
1431             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1432             processEndTag(endSelect);
1433             return;
1434         }
1435         if (token.name() == inputTag
1436             || token.name() == keygenTag
1437             || token.name() == textareaTag) {
1438             parseError(token);
1439             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1440                 ASSERT(isParsingFragment());
1441                 return;
1442             }
1443             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1444             processEndTag(endSelect);
1445             processStartTag(token);
1446             return;
1447         }
1448         if (token.name() == scriptTag) {
1449             bool didProcess = processStartTagForInHead(token);
1450             ASSERT_UNUSED(didProcess, didProcess);
1451             return;
1452         }
1453         break;
1454     case InTableTextMode:
1455         defaultForInTableText();
1456         processStartTag(token);
1457         break;
1458     case TextMode:
1459         ASSERT_NOT_REACHED();
1460         break;
1461     }
1462 }
1463
1464 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1465 {
1466     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1467     ASSERT(token.name() == bodyTag);
1468     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1469         parseError(token);
1470         return false;
1471     }
1472     notImplemented(); // Emit a more specific parse error based on stack contents.
1473     setInsertionMode(AfterBodyMode);
1474     return true;
1475 }
1476
1477 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1478 {
1479     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1480     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1481     while (1) {
1482         RefPtr<ContainerNode> node = record->node();
1483         if (node->hasLocalName(token.name())) {
1484             m_tree.generateImpliedEndTags();
1485             // FIXME: The ElementRecord pointed to by record might be deleted by
1486             // the preceding call. Perhaps we should hold a RefPtr so that it
1487             // stays alive for the duration of record's scope.
1488             record = 0;
1489             if (!m_tree.currentNode()->hasLocalName(token.name())) {
1490                 parseError(token);
1491                 // FIXME: This is either a bug in the spec, or a bug in our
1492                 // implementation.  Filed a bug with HTML5:
1493                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1494                 // We might have already popped the node for the token in
1495                 // generateImpliedEndTags, just abort.
1496                 if (!m_tree.openElements()->contains(toElement(node.get())))
1497                     return;
1498             }
1499             m_tree.openElements()->popUntilPopped(toElement(node.get()));
1500             return;
1501         }
1502         if (isSpecialNode(node.get())) {
1503             parseError(token);
1504             return;
1505         }
1506         record = record->next();
1507     }
1508 }
1509
1510 // FIXME: This probably belongs on HTMLElementStack.
1511 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1512 {
1513     HTMLElementStack::ElementRecord* furthestBlock = 0;
1514     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1515     for (; record; record = record->next()) {
1516         if (record->element() == formattingElement)
1517             return furthestBlock;
1518         if (isSpecialNode(record->element()))
1519             furthestBlock = record;
1520     }
1521     ASSERT_NOT_REACHED();
1522     return 0;
1523 }
1524
1525 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1526 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1527 {
1528     // The adoption agency algorithm is N^2.  We limit the number of iterations
1529     // to stop from hanging the whole browser.  This limit is copied from the
1530     // legacy tree builder and might need to be tweaked in the future.
1531     static const int adoptionAgencyIterationLimit = 10;
1532
1533     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1534         // 1.
1535         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1536         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1537             parseError(token);
1538             notImplemented(); // Check the stack of open elements for a more specific parse error.
1539             return;
1540         }
1541         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1542         if (!formattingElementRecord) {
1543             parseError(token);
1544             m_tree.activeFormattingElements()->remove(formattingElement);
1545             return;
1546         }
1547         if (formattingElement != m_tree.currentElement())
1548             parseError(token);
1549         // 2.
1550         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1551         // 3.
1552         if (!furthestBlock) {
1553             m_tree.openElements()->popUntilPopped(formattingElement);
1554             m_tree.activeFormattingElements()->remove(formattingElement);
1555             return;
1556         }
1557         // 4.
1558         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1559         RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1560         // 5.
1561         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1562         // 6.
1563         HTMLElementStack::ElementRecord* node = furthestBlock;
1564         HTMLElementStack::ElementRecord* nextNode = node->next();
1565         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1566         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1567             // 6.1
1568             node = nextNode;
1569             ASSERT(node);
1570             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1571             // 6.2
1572             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1573                 m_tree.openElements()->remove(node->element());
1574                 node = 0;
1575                 continue;
1576             }
1577             // 6.3
1578             if (node == formattingElementRecord)
1579                 break;
1580             // 6.5
1581             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1582             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1583             nodeEntry->replaceElement(newElement.get());
1584             node->replaceElement(newElement.release());
1585             // 6.4 -- Intentionally out of order to handle the case where node
1586             // was replaced in 6.5.
1587             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1588             if (lastNode == furthestBlock)
1589                 bookmark.moveToAfter(nodeEntry);
1590             // 6.6
1591             if (ContainerNode* parent = lastNode->element()->parentNode())
1592                 parent->parserRemoveChild(lastNode->element());
1593             node->element()->parserAddChild(lastNode->element());
1594             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1595                 lastNode->element()->lazyAttach();
1596             // 6.7
1597             lastNode = node;
1598         }
1599         // 7
1600         const AtomicString& commonAncestorTag = commonAncestor->localName();
1601         if (ContainerNode* parent = lastNode->element()->parentNode())
1602             parent->parserRemoveChild(lastNode->element());
1603         // FIXME: If this moves to HTMLConstructionSite, this check should use
1604         // causesFosterParenting(tagName) instead.
1605         if (commonAncestorTag == tableTag
1606             || commonAncestorTag == trTag
1607             || isTableBodyContextTag(commonAncestorTag))
1608             m_tree.fosterParent(lastNode->element());
1609         else {
1610             commonAncestor->parserAddChild(lastNode->element());
1611             ASSERT(lastNode->node()->isElementNode());
1612             ASSERT(lastNode->element()->parentNode());
1613             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1614                 lastNode->element()->lazyAttach();
1615         }
1616         // 8
1617         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1618         // 9
1619         newElement->takeAllChildrenFrom(furthestBlock->element());
1620         // 10
1621         Element* furthestBlockElement = furthestBlock->element();
1622         // FIXME: All this creation / parserAddChild / attach business should
1623         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1624         //        should all be in some HTMLConstructionSite function.
1625         furthestBlockElement->parserAddChild(newElement);
1626         if (furthestBlockElement->attached() && !newElement->attached()) {
1627             // Notice that newElement might already be attached if, for example, one of the reparented
1628             // children is a style element, which attaches itself automatically.
1629             newElement->attach();
1630         }
1631         // 11
1632         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1633         // 12
1634         m_tree.openElements()->remove(formattingElement);
1635         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1636     }
1637 }
1638
1639 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1640 {
1641     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1642     bool last = false;
1643     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1644     while (1) {
1645         ContainerNode* node = nodeRecord->node();
1646         if (node == m_tree.openElements()->rootNode()) {
1647             ASSERT(isParsingFragment());
1648             last = true;
1649             node = m_fragmentContext.contextElement();
1650         }
1651         if (node->hasTagName(selectTag)) {
1652             ASSERT(isParsingFragment());
1653             return setInsertionMode(InSelectMode);
1654         }
1655         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1656             return setInsertionMode(InCellMode);
1657         if (node->hasTagName(trTag))
1658             return setInsertionMode(InRowMode);
1659         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1660             return setInsertionMode(InTableBodyMode);
1661         if (node->hasTagName(captionTag))
1662             return setInsertionMode(InCaptionMode);
1663         if (node->hasTagName(colgroupTag)) {
1664             ASSERT(isParsingFragment());
1665             return setInsertionMode(InColumnGroupMode);
1666         }
1667         if (node->hasTagName(tableTag))
1668             return setInsertionMode(InTableMode);
1669         if (node->hasTagName(headTag)) {
1670             ASSERT(isParsingFragment());
1671             return setInsertionMode(InBodyMode);
1672         }
1673         if (node->hasTagName(bodyTag))
1674             return setInsertionMode(InBodyMode);
1675         if (node->hasTagName(framesetTag)) {
1676             ASSERT(isParsingFragment());
1677             return setInsertionMode(InFramesetMode);
1678         }
1679         if (node->hasTagName(htmlTag)) {
1680             ASSERT(isParsingFragment());
1681             return setInsertionMode(BeforeHeadMode);
1682         }
1683         if (last) {
1684             ASSERT(isParsingFragment());
1685             return setInsertionMode(InBodyMode);
1686         }
1687         nodeRecord = nodeRecord->next();
1688     }
1689 }
1690
1691 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1692 {
1693     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1694     if (isTableBodyContextTag(token.name())) {
1695         if (!m_tree.openElements()->inTableScope(token.name())) {
1696             parseError(token);
1697             return;
1698         }
1699         m_tree.openElements()->popUntilTableBodyScopeMarker();
1700         m_tree.openElements()->pop();
1701         setInsertionMode(InTableMode);
1702         return;
1703     }
1704     if (token.name() == tableTag) {
1705         // FIXME: This is slow.
1706         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1707             ASSERT(isParsingFragment());
1708             parseError(token);
1709             return;
1710         }
1711         m_tree.openElements()->popUntilTableBodyScopeMarker();
1712         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1713         processFakeEndTag(m_tree.currentElement()->tagQName());
1714         processEndTag(token);
1715         return;
1716     }
1717     if (token.name() == bodyTag
1718         || isCaptionColOrColgroupTag(token.name())
1719         || token.name() == htmlTag
1720         || isTableCellContextTag(token.name())
1721         || token.name() == trTag) {
1722         parseError(token);
1723         return;
1724     }
1725     processEndTagForInTable(token);
1726 }
1727
1728 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1729 {
1730     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1731     if (token.name() == trTag) {
1732         processTrEndTagForInRow();
1733         return;
1734     }
1735     if (token.name() == tableTag) {
1736         if (!processTrEndTagForInRow()) {
1737             ASSERT(isParsingFragment());
1738             return;
1739         }
1740         ASSERT(insertionMode() == InTableBodyMode);
1741         processEndTag(token);
1742         return;
1743     }
1744     if (isTableBodyContextTag(token.name())) {
1745         if (!m_tree.openElements()->inTableScope(token.name())) {
1746             parseError(token);
1747             return;
1748         }
1749         processFakeEndTag(trTag);
1750         ASSERT(insertionMode() == InTableBodyMode);
1751         processEndTag(token);
1752         return;
1753     }
1754     if (token.name() == bodyTag
1755         || isCaptionColOrColgroupTag(token.name())
1756         || token.name() == htmlTag
1757         || isTableCellContextTag(token.name())) {
1758         parseError(token);
1759         return;
1760     }
1761     processEndTagForInTable(token);
1762 }
1763
1764 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1765 {
1766     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1767     if (isTableCellContextTag(token.name())) {
1768         if (!m_tree.openElements()->inTableScope(token.name())) {
1769             parseError(token);
1770             return;
1771         }
1772         m_tree.generateImpliedEndTags();
1773         if (!m_tree.currentNode()->hasLocalName(token.name()))
1774             parseError(token);
1775         m_tree.openElements()->popUntilPopped(token.name());
1776         m_tree.activeFormattingElements()->clearToLastMarker();
1777         setInsertionMode(InRowMode);
1778         return;
1779     }
1780     if (token.name() == bodyTag
1781         || isCaptionColOrColgroupTag(token.name())
1782         || token.name() == htmlTag) {
1783         parseError(token);
1784         return;
1785     }
1786     if (token.name() == tableTag
1787         || token.name() == trTag
1788         || isTableBodyContextTag(token.name())) {
1789         if (!m_tree.openElements()->inTableScope(token.name())) {
1790             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1791             parseError(token);
1792             return;
1793         }
1794         closeTheCell();
1795         processEndTag(token);
1796         return;
1797     }
1798     processEndTagForInBody(token);
1799 }
1800
1801 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1802 {
1803     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1804     if (token.name() == bodyTag) {
1805         processBodyEndTagForInBody(token);
1806         return;
1807     }
1808     if (token.name() == htmlTag) {
1809         AtomicHTMLToken endBody(HTMLTokenTypes::EndTag, bodyTag.localName());
1810         if (processBodyEndTagForInBody(endBody))
1811             processEndTag(token);
1812         return;
1813     }
1814     if (token.name() == addressTag
1815         || token.name() == articleTag
1816         || token.name() == asideTag
1817         || token.name() == blockquoteTag
1818         || token.name() == buttonTag
1819         || token.name() == centerTag
1820         || token.name() == detailsTag
1821         || token.name() == dirTag
1822         || token.name() == divTag
1823         || token.name() == dlTag
1824         || token.name() == fieldsetTag
1825         || token.name() == figcaptionTag
1826         || token.name() == figureTag
1827         || token.name() == footerTag
1828         || token.name() == headerTag
1829         || token.name() == hgroupTag
1830         || token.name() == listingTag
1831         || token.name() == menuTag
1832         || token.name() == navTag
1833         || token.name() == olTag
1834         || token.name() == preTag
1835         || token.name() == sectionTag
1836         || token.name() == summaryTag
1837         || token.name() == ulTag) {
1838         if (!m_tree.openElements()->inScope(token.name())) {
1839             parseError(token);
1840             return;
1841         }
1842         m_tree.generateImpliedEndTags();
1843         if (!m_tree.currentNode()->hasLocalName(token.name()))
1844             parseError(token);
1845         m_tree.openElements()->popUntilPopped(token.name());
1846         return;
1847     }
1848     if (token.name() == formTag) {
1849         RefPtr<Element> node = m_tree.takeForm();
1850         if (!node || !m_tree.openElements()->inScope(node.get())) {
1851             parseError(token);
1852             return;
1853         }
1854         m_tree.generateImpliedEndTags();
1855         if (m_tree.currentElement() != node.get())
1856             parseError(token);
1857         m_tree.openElements()->remove(node.get());
1858     }
1859     if (token.name() == pTag) {
1860         if (!m_tree.openElements()->inButtonScope(token.name())) {
1861             parseError(token);
1862             processFakeStartTag(pTag);
1863             ASSERT(m_tree.openElements()->inScope(token.name()));
1864             processEndTag(token);
1865             return;
1866         }
1867         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1868         if (!m_tree.currentNode()->hasLocalName(token.name()))
1869             parseError(token);
1870         m_tree.openElements()->popUntilPopped(token.name());
1871         return;
1872     }
1873     if (token.name() == liTag) {
1874         if (!m_tree.openElements()->inListItemScope(token.name())) {
1875             parseError(token);
1876             return;
1877         }
1878         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1879         if (!m_tree.currentNode()->hasLocalName(token.name()))
1880             parseError(token);
1881         m_tree.openElements()->popUntilPopped(token.name());
1882         return;
1883     }
1884     if (token.name() == ddTag
1885         || token.name() == dtTag) {
1886         if (!m_tree.openElements()->inScope(token.name())) {
1887             parseError(token);
1888             return;
1889         }
1890         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1891         if (!m_tree.currentNode()->hasLocalName(token.name()))
1892             parseError(token);
1893         m_tree.openElements()->popUntilPopped(token.name());
1894         return;
1895     }
1896     if (isNumberedHeaderTag(token.name())) {
1897         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1898             parseError(token);
1899             return;
1900         }
1901         m_tree.generateImpliedEndTags();
1902         if (!m_tree.currentNode()->hasLocalName(token.name()))
1903             parseError(token);
1904         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1905         return;
1906     }
1907     if (isFormattingTag(token.name())) {
1908         callTheAdoptionAgency(token);
1909         return;
1910     }
1911     if (token.name() == appletTag
1912         || token.name() == marqueeTag
1913         || token.name() == objectTag) {
1914         if (!m_tree.openElements()->inScope(token.name())) {
1915             parseError(token);
1916             return;
1917         }
1918         m_tree.generateImpliedEndTags();
1919         if (!m_tree.currentNode()->hasLocalName(token.name()))
1920             parseError(token);
1921         m_tree.openElements()->popUntilPopped(token.name());
1922         m_tree.activeFormattingElements()->clearToLastMarker();
1923         return;
1924     }
1925     if (token.name() == brTag) {
1926         parseError(token);
1927         processFakeStartTag(brTag);
1928         return;
1929     }
1930     processAnyOtherEndTagForInBody(token);
1931 }
1932
1933 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1934 {
1935     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1936         ASSERT(isParsingFragment());
1937         // FIXME: parse error
1938         return false;
1939     }
1940     m_tree.generateImpliedEndTags();
1941     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1942     m_tree.openElements()->popUntilPopped(captionTag.localName());
1943     m_tree.activeFormattingElements()->clearToLastMarker();
1944     setInsertionMode(InTableMode);
1945     return true;
1946 }
1947
1948 bool HTMLTreeBuilder::processTrEndTagForInRow()
1949 {
1950     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1951         ASSERT(isParsingFragment());
1952         // FIXME: parse error
1953         return false;
1954     }
1955     m_tree.openElements()->popUntilTableRowScopeMarker();
1956     ASSERT(m_tree.currentElement()->hasTagName(trTag));
1957     m_tree.openElements()->pop();
1958     setInsertionMode(InTableBodyMode);
1959     return true;
1960 }
1961
1962 bool HTMLTreeBuilder::processTableEndTagForInTable()
1963 {
1964     if (!m_tree.openElements()->inTableScope(tableTag)) {
1965         ASSERT(isParsingFragment());
1966         // FIXME: parse error.
1967         return false;
1968     }
1969     m_tree.openElements()->popUntilPopped(tableTag.localName());
1970     resetInsertionModeAppropriately();
1971     return true;
1972 }
1973
1974 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1975 {
1976     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1977     if (token.name() == tableTag) {
1978         processTableEndTagForInTable();
1979         return;
1980     }
1981     if (token.name() == bodyTag
1982         || isCaptionColOrColgroupTag(token.name())
1983         || token.name() == htmlTag
1984         || isTableBodyContextTag(token.name())
1985         || isTableCellContextTag(token.name())
1986         || token.name() == trTag) {
1987         parseError(token);
1988         return;
1989     }
1990     // Is this redirection necessary here?
1991     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1992     processEndTagForInBody(token);
1993 }
1994
1995 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
1996 {
1997     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1998     switch (insertionMode()) {
1999     case InitialMode:
2000         ASSERT(insertionMode() == InitialMode);
2001         defaultForInitial();
2002         // Fall through.
2003     case BeforeHTMLMode:
2004         ASSERT(insertionMode() == BeforeHTMLMode);
2005         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2006             parseError(token);
2007             return;
2008         }
2009         defaultForBeforeHTML();
2010         // Fall through.
2011     case BeforeHeadMode:
2012         ASSERT(insertionMode() == BeforeHeadMode);
2013         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2014             parseError(token);
2015             return;
2016         }
2017         defaultForBeforeHead();
2018         // Fall through.
2019     case InHeadMode:
2020         ASSERT(insertionMode() == InHeadMode);
2021         if (token.name() == headTag) {
2022             m_tree.openElements()->popHTMLHeadElement();
2023             setInsertionMode(AfterHeadMode);
2024             return;
2025         }
2026         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2027             parseError(token);
2028             return;
2029         }
2030         defaultForInHead();
2031         // Fall through.
2032     case AfterHeadMode:
2033         ASSERT(insertionMode() == AfterHeadMode);
2034         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2035             parseError(token);
2036             return;
2037         }
2038         defaultForAfterHead();
2039         // Fall through
2040     case InBodyMode:
2041         ASSERT(insertionMode() == InBodyMode);
2042         processEndTagForInBody(token);
2043         break;
2044     case InTableMode:
2045         ASSERT(insertionMode() == InTableMode);
2046         processEndTagForInTable(token);
2047         break;
2048     case InCaptionMode:
2049         ASSERT(insertionMode() == InCaptionMode);
2050         if (token.name() == captionTag) {
2051             processCaptionEndTagForInCaption();
2052             return;
2053         }
2054         if (token.name() == tableTag) {
2055             parseError(token);
2056             if (!processCaptionEndTagForInCaption()) {
2057                 ASSERT(isParsingFragment());
2058                 return;
2059             }
2060             processEndTag(token);
2061             return;
2062         }
2063         if (token.name() == bodyTag
2064             || token.name() == colTag
2065             || token.name() == colgroupTag
2066             || token.name() == htmlTag
2067             || isTableBodyContextTag(token.name())
2068             || isTableCellContextTag(token.name())
2069             || token.name() == trTag) {
2070             parseError(token);
2071             return;
2072         }
2073         processEndTagForInBody(token);
2074         break;
2075     case InColumnGroupMode:
2076         ASSERT(insertionMode() == InColumnGroupMode);
2077         if (token.name() == colgroupTag) {
2078             processColgroupEndTagForInColumnGroup();
2079             return;
2080         }
2081         if (token.name() == colTag) {
2082             parseError(token);
2083             return;
2084         }
2085         if (!processColgroupEndTagForInColumnGroup()) {
2086             ASSERT(isParsingFragment());
2087             return;
2088         }
2089         processEndTag(token);
2090         break;
2091     case InRowMode:
2092         ASSERT(insertionMode() == InRowMode);
2093         processEndTagForInRow(token);
2094         break;
2095     case InCellMode:
2096         ASSERT(insertionMode() == InCellMode);
2097         processEndTagForInCell(token);
2098         break;
2099     case InTableBodyMode:
2100         ASSERT(insertionMode() == InTableBodyMode);
2101         processEndTagForInTableBody(token);
2102         break;
2103     case AfterBodyMode:
2104         ASSERT(insertionMode() == AfterBodyMode);
2105         if (token.name() == htmlTag) {
2106             if (isParsingFragment()) {
2107                 parseError(token);
2108                 return;
2109             }
2110             setInsertionMode(AfterAfterBodyMode);
2111             return;
2112         }
2113         // Fall through.
2114     case AfterAfterBodyMode:
2115         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2116         parseError(token);
2117         setInsertionMode(InBodyMode);
2118         processEndTag(token);
2119         break;
2120     case InHeadNoscriptMode:
2121         ASSERT(insertionMode() == InHeadNoscriptMode);
2122         if (token.name() == noscriptTag) {
2123             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2124             m_tree.openElements()->pop();
2125             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2126             setInsertionMode(InHeadMode);
2127             return;
2128         }
2129         if (token.name() != brTag) {
2130             parseError(token);
2131             return;
2132         }
2133         defaultForInHeadNoscript();
2134         processToken(token);
2135         break;
2136     case TextMode:
2137         if (token.name() == scriptTag) {
2138             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2139             m_isPaused = true;
2140             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2141             m_scriptToProcess = m_tree.currentElement();
2142             m_scriptToProcessStartPosition = m_lastScriptElementStartPosition;
2143             m_tree.openElements()->pop();
2144             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2145                 m_scriptToProcess->removeAllChildren();
2146             setInsertionMode(m_originalInsertionMode);
2147
2148             // This token will not have been created by the tokenizer if a
2149             // self-closing script tag was encountered and pre-HTML5 parser
2150             // quirks are enabled. We must set the tokenizer's state to
2151             // DataState explicitly if the tokenizer didn't have a chance to.
2152             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
2153             m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
2154             return;
2155         }
2156         m_tree.openElements()->pop();
2157         setInsertionMode(m_originalInsertionMode);
2158         break;
2159     case InFramesetMode:
2160         ASSERT(insertionMode() == InFramesetMode);
2161         if (token.name() == framesetTag) {
2162             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2163                 parseError(token);
2164                 return;
2165             }
2166             m_tree.openElements()->pop();
2167             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2168                 setInsertionMode(AfterFramesetMode);
2169             return;
2170         }
2171         break;
2172     case AfterFramesetMode:
2173         ASSERT(insertionMode() == AfterFramesetMode);
2174         if (token.name() == htmlTag) {
2175             setInsertionMode(AfterAfterFramesetMode);
2176             return;
2177         }
2178         // Fall through.
2179     case AfterAfterFramesetMode:
2180         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2181         parseError(token);
2182         break;
2183     case InSelectInTableMode:
2184         ASSERT(insertionMode() == InSelectInTableMode);
2185         if (token.name() == captionTag
2186             || token.name() == tableTag
2187             || isTableBodyContextTag(token.name())
2188             || token.name() == trTag
2189             || isTableCellContextTag(token.name())) {
2190             parseError(token);
2191             if (m_tree.openElements()->inTableScope(token.name())) {
2192                 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
2193                 processEndTag(endSelect);
2194                 processEndTag(token);
2195             }
2196             return;
2197         }
2198         // Fall through.
2199     case InSelectMode:
2200         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2201         if (token.name() == optgroupTag) {
2202             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2203                 processFakeEndTag(optionTag);
2204             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2205                 m_tree.openElements()->pop();
2206                 return;
2207             }
2208             parseError(token);
2209             return;
2210         }
2211         if (token.name() == optionTag) {
2212             if (m_tree.currentNode()->hasTagName(optionTag)) {
2213                 m_tree.openElements()->pop();
2214                 return;
2215             }
2216             parseError(token);
2217             return;
2218         }
2219         if (token.name() == selectTag) {
2220             if (!m_tree.openElements()->inSelectScope(token.name())) {
2221                 ASSERT(isParsingFragment());
2222                 parseError(token);
2223                 return;
2224             }
2225             m_tree.openElements()->popUntilPopped(selectTag.localName());
2226             resetInsertionModeAppropriately();
2227             return;
2228         }
2229         break;
2230     case InTableTextMode:
2231         defaultForInTableText();
2232         processEndTag(token);
2233         break;
2234     }
2235 }
2236
2237 class HTMLTreeBuilder::FakeInsertionMode {
2238     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2239 public:
2240     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2241         : m_treeBuilder(treeBuilder)
2242         , m_originalMode(treeBuilder->insertionMode())
2243     {
2244         m_treeBuilder->setFakeInsertionMode(mode);
2245     }
2246
2247     ~FakeInsertionMode()
2248     {
2249         if (m_treeBuilder->isFakeInsertionMode())
2250             m_treeBuilder->setInsertionMode(m_originalMode);
2251     }
2252
2253 private:
2254     HTMLTreeBuilder* m_treeBuilder;
2255     InsertionMode m_originalMode;
2256 };
2257
2258 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2259 {
2260     ASSERT(token.type() == HTMLTokenTypes::Comment);
2261     if (m_insertionMode == InitialMode
2262         || m_insertionMode == BeforeHTMLMode
2263         || m_insertionMode == AfterAfterBodyMode
2264         || m_insertionMode == AfterAfterFramesetMode) {
2265         m_tree.insertCommentOnDocument(token);
2266         return;
2267     }
2268     if (m_insertionMode == AfterBodyMode) {
2269         m_tree.insertCommentOnHTMLHtmlElement(token);
2270         return;
2271     }
2272     if (m_insertionMode == InTableTextMode) {
2273         defaultForInTableText();
2274         processComment(token);
2275         return;
2276     }
2277     m_tree.insertComment(token);
2278 }
2279
2280 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2281 {
2282     ASSERT(token.type() == HTMLTokenTypes::Character);
2283     ExternalCharacterTokenBuffer buffer(token);
2284     processCharacterBuffer(buffer);
2285 }
2286
2287 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2288 {
2289 ReprocessBuffer:
2290     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2291     // Note that this logic is different than the generic \r\n collapsing
2292     // handled in the input stream preprocessor. This logic is here as an
2293     // "authoring convenience" so folks can write:
2294     //
2295     // <pre>
2296     // lorem ipsum
2297     // lorem ipsum
2298     // </pre>
2299     //
2300     // without getting an extra newline at the start of their <pre> element.
2301     if (m_shouldSkipLeadingNewline) {
2302         m_shouldSkipLeadingNewline = false;
2303         buffer.skipAtMostOneLeadingNewline();
2304         if (buffer.isEmpty())
2305             return;
2306     }
2307
2308     switch (insertionMode()) {
2309     case InitialMode: {
2310         ASSERT(insertionMode() == InitialMode);
2311         buffer.skipLeadingWhitespace();
2312         if (buffer.isEmpty())
2313             return;
2314         defaultForInitial();
2315         // Fall through.
2316     }
2317     case BeforeHTMLMode: {
2318         ASSERT(insertionMode() == BeforeHTMLMode);
2319         buffer.skipLeadingWhitespace();
2320         if (buffer.isEmpty())
2321             return;
2322         defaultForBeforeHTML();
2323         // Fall through.
2324     }
2325     case BeforeHeadMode: {
2326         ASSERT(insertionMode() == BeforeHeadMode);
2327         buffer.skipLeadingWhitespace();
2328         if (buffer.isEmpty())
2329             return;
2330         defaultForBeforeHead();
2331         // Fall through.
2332     }
2333     case InHeadMode: {
2334         ASSERT(insertionMode() == InHeadMode);
2335         String leadingWhitespace = buffer.takeLeadingWhitespace();
2336         if (!leadingWhitespace.isEmpty())
2337             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2338         if (buffer.isEmpty())
2339             return;
2340         defaultForInHead();
2341         // Fall through.
2342     }
2343     case AfterHeadMode: {
2344         ASSERT(insertionMode() == AfterHeadMode);
2345         String leadingWhitespace = buffer.takeLeadingWhitespace();
2346         if (!leadingWhitespace.isEmpty())
2347             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2348         if (buffer.isEmpty())
2349             return;
2350         defaultForAfterHead();
2351         // Fall through.
2352     }
2353     case InBodyMode:
2354     case InCaptionMode:
2355     case InCellMode: {
2356         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2357         processCharacterBufferForInBody(buffer);
2358         break;
2359     }
2360     case InTableMode:
2361     case InTableBodyMode:
2362     case InRowMode: {
2363         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2364         ASSERT(m_pendingTableCharacters.isEmpty());
2365         if (m_tree.currentNode()->isElementNode()
2366             && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
2367                 || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
2368                 || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
2369                 || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
2370                 || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
2371             m_originalInsertionMode = m_insertionMode;
2372             setInsertionMode(InTableTextMode);
2373             // Note that we fall through to the InTableTextMode case below.
2374         } else {
2375             HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2376             processCharacterBufferForInBody(buffer);
2377             break;
2378         }
2379         // Fall through.
2380     }
2381     case InTableTextMode: {
2382         buffer.giveRemainingTo(m_pendingTableCharacters);
2383         break;
2384     }
2385     case InColumnGroupMode: {
2386         ASSERT(insertionMode() == InColumnGroupMode);
2387         String leadingWhitespace = buffer.takeLeadingWhitespace();
2388         if (!leadingWhitespace.isEmpty())
2389             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2390         if (buffer.isEmpty())
2391             return;
2392         if (!processColgroupEndTagForInColumnGroup()) {
2393             ASSERT(isParsingFragment());
2394             // The spec tells us to drop these characters on the floor.
2395             buffer.skipLeadingNonWhitespace();
2396             if (buffer.isEmpty())
2397                 return;
2398         }
2399         goto ReprocessBuffer;
2400     }
2401     case AfterBodyMode:
2402     case AfterAfterBodyMode: {
2403         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2404         // FIXME: parse error
2405         setInsertionMode(InBodyMode);
2406         goto ReprocessBuffer;
2407         break;
2408     }
2409     case TextMode: {
2410         ASSERT(insertionMode() == TextMode);
2411         m_tree.insertTextNode(buffer.takeRemaining());
2412         break;
2413     }
2414     case InHeadNoscriptMode: {
2415         ASSERT(insertionMode() == InHeadNoscriptMode);
2416         String leadingWhitespace = buffer.takeLeadingWhitespace();
2417         if (!leadingWhitespace.isEmpty())
2418             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2419         if (buffer.isEmpty())
2420             return;
2421         defaultForInHeadNoscript();
2422         goto ReprocessBuffer;
2423         break;
2424     }
2425     case InFramesetMode:
2426     case AfterFramesetMode: {
2427         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2428         String leadingWhitespace = buffer.takeRemainingWhitespace();
2429         if (!leadingWhitespace.isEmpty())
2430             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2431         // FIXME: We should generate a parse error if we skipped over any
2432         // non-whitespace characters.
2433         break;
2434     }
2435     case InSelectInTableMode:
2436     case InSelectMode: {
2437         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2438         m_tree.insertTextNode(buffer.takeRemaining());
2439         break;
2440     }
2441     case AfterAfterFramesetMode: {
2442         String leadingWhitespace = buffer.takeRemainingWhitespace();
2443         if (!leadingWhitespace.isEmpty()) {
2444             m_tree.reconstructTheActiveFormattingElements();
2445             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2446         }
2447         // FIXME: We should generate a parse error if we skipped over any
2448         // non-whitespace characters.
2449         break;
2450     }
2451     }
2452 }
2453
2454 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2455 {
2456     m_tree.reconstructTheActiveFormattingElements();
2457     String characters = buffer.takeRemaining();
2458     m_tree.insertTextNode(characters);
2459     if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2460         m_framesetOk = false;
2461 }
2462
2463 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2464 {
2465     ASSERT(token.type() == HTMLTokenTypes::EndOfFile);
2466     switch (insertionMode()) {
2467     case InitialMode:
2468         ASSERT(insertionMode() == InitialMode);
2469         defaultForInitial();
2470         // Fall through.
2471     case BeforeHTMLMode:
2472         ASSERT(insertionMode() == BeforeHTMLMode);
2473         defaultForBeforeHTML();
2474         // Fall through.
2475     case BeforeHeadMode:
2476         ASSERT(insertionMode() == BeforeHeadMode);
2477         defaultForBeforeHead();
2478         // Fall through.
2479     case InHeadMode:
2480         ASSERT(insertionMode() == InHeadMode);
2481         defaultForInHead();
2482         // Fall through.
2483     case AfterHeadMode:
2484         ASSERT(insertionMode() == AfterHeadMode);
2485         defaultForAfterHead();
2486         // Fall through
2487     case InBodyMode:
2488     case InCellMode:
2489     case InCaptionMode:
2490     case InRowMode:
2491         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2492         notImplemented(); // Emit parse error based on what elements are still open.
2493         break;
2494     case AfterBodyMode:
2495     case AfterAfterBodyMode:
2496         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2497         break;
2498     case InHeadNoscriptMode:
2499         ASSERT(insertionMode() == InHeadNoscriptMode);
2500         defaultForInHeadNoscript();
2501         processEndOfFile(token);
2502         return;
2503     case AfterFramesetMode:
2504     case AfterAfterFramesetMode:
2505         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2506         break;
2507     case InFramesetMode:
2508     case InTableMode:
2509     case InTableBodyMode:
2510     case InSelectInTableMode:
2511     case InSelectMode:
2512         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2513         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2514             parseError(token);
2515         break;
2516     case InColumnGroupMode:
2517         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2518             ASSERT(isParsingFragment());
2519             return; // FIXME: Should we break here instead of returning?
2520         }
2521         if (!processColgroupEndTagForInColumnGroup()) {
2522             ASSERT(isParsingFragment());
2523             return; // FIXME: Should we break here instead of returning?
2524         }
2525         processEndOfFile(token);
2526         return;
2527     case InTableTextMode:
2528         defaultForInTableText();
2529         processEndOfFile(token);
2530         return;
2531     case TextMode:
2532         parseError(token);
2533         if (m_tree.currentNode()->hasTagName(scriptTag))
2534             notImplemented(); // mark the script element as "already started".
2535         m_tree.openElements()->pop();
2536         ASSERT(m_originalInsertionMode != TextMode);
2537         setInsertionMode(m_originalInsertionMode);
2538         processEndOfFile(token);
2539         return;
2540     }
2541     ASSERT(m_tree.currentNode());
2542     m_tree.openElements()->popAll();
2543 }
2544
2545 void HTMLTreeBuilder::defaultForInitial()
2546 {
2547     notImplemented();
2548     if (!m_fragmentContext.fragment())
2549         m_document->setCompatibilityMode(Document::QuirksMode);
2550     // FIXME: parse error
2551     setInsertionMode(BeforeHTMLMode);
2552 }
2553
2554 void HTMLTreeBuilder::defaultForBeforeHTML()
2555 {
2556     AtomicHTMLToken startHTML(HTMLTokenTypes::StartTag, htmlTag.localName());
2557     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2558     setInsertionMode(BeforeHeadMode);
2559 }
2560
2561 void HTMLTreeBuilder::defaultForBeforeHead()
2562 {
2563     AtomicHTMLToken startHead(HTMLTokenTypes::StartTag, headTag.localName());
2564     processStartTag(startHead);
2565 }
2566
2567 void HTMLTreeBuilder::defaultForInHead()
2568 {
2569     AtomicHTMLToken endHead(HTMLTokenTypes::EndTag, headTag.localName());
2570     processEndTag(endHead);
2571 }
2572
2573 void HTMLTreeBuilder::defaultForInHeadNoscript()
2574 {
2575     AtomicHTMLToken endNoscript(HTMLTokenTypes::EndTag, noscriptTag.localName());
2576     processEndTag(endNoscript);
2577 }
2578
2579 void HTMLTreeBuilder::defaultForAfterHead()
2580 {
2581     AtomicHTMLToken startBody(HTMLTokenTypes::StartTag, bodyTag.localName());
2582     processStartTag(startBody);
2583     m_framesetOk = true;
2584 }
2585
2586 void HTMLTreeBuilder::defaultForInTableText()
2587 {
2588     String characters = m_pendingTableCharacters.toString();
2589     m_pendingTableCharacters.clear();
2590     if (!isAllWhitespace(characters)) {
2591         // FIXME: parse error
2592         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2593         m_tree.reconstructTheActiveFormattingElements();
2594         m_tree.insertTextNode(characters, NotAllWhitespace);
2595         m_framesetOk = false;
2596         setInsertionMode(m_originalInsertionMode);
2597         return;
2598     }
2599     m_tree.insertTextNode(characters);
2600     setInsertionMode(m_originalInsertionMode);
2601 }
2602
2603 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2604 {
2605     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2606     if (token.name() == htmlTag) {
2607         m_tree.insertHTMLHtmlStartTagInBody(token);
2608         return true;
2609     }
2610     if (token.name() == baseTag
2611         || token.name() == basefontTag
2612         || token.name() == bgsoundTag
2613         || token.name() == commandTag
2614         || token.name() == linkTag
2615         || token.name() == metaTag) {
2616         m_tree.insertSelfClosingHTMLElement(token);
2617         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2618         return true;
2619     }
2620     if (token.name() == titleTag) {
2621         processGenericRCDATAStartTag(token);
2622         return true;
2623     }
2624     if (token.name() == noscriptTag) {
2625         if (scriptEnabled(m_document->frame())) {
2626             processGenericRawTextStartTag(token);
2627             return true;
2628         }
2629         m_tree.insertHTMLElement(token);
2630         setInsertionMode(InHeadNoscriptMode);
2631         return true;
2632     }
2633     if (token.name() == noframesTag || token.name() == styleTag) {
2634         processGenericRawTextStartTag(token);
2635         return true;
2636     }
2637     if (token.name() == scriptTag) {
2638         processScriptStartTag(token);
2639         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2640             processFakeEndTag(scriptTag);
2641         return true;
2642     }
2643     if (token.name() == headTag) {
2644         parseError(token);
2645         return true;
2646     }
2647     return false;
2648 }
2649
2650 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2651 {
2652     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2653     m_tree.insertHTMLElement(token);
2654     m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
2655     m_originalInsertionMode = m_insertionMode;
2656     setInsertionMode(TextMode);
2657 }
2658
2659 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2660 {
2661     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2662     m_tree.insertHTMLElement(token);
2663     m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
2664     m_originalInsertionMode = m_insertionMode;
2665     setInsertionMode(TextMode);
2666 }
2667
2668 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2669 {
2670     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2671     m_tree.insertScriptElement(token);
2672     m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
2673     m_originalInsertionMode = m_insertionMode;
2674
2675     TextPosition position = m_parser->textPosition();
2676
2677     ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
2678
2679     m_lastScriptElementStartPosition = position;
2680
2681     setInsertionMode(TextMode);
2682 }
2683
2684 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2685 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken& token)
2686 {
2687     if (m_tree.isEmpty())
2688         return false;
2689     ContainerNode* node = m_tree.currentNode();
2690     if (isInHTMLNamespace(node))
2691         return false;
2692     if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
2693         if (token.type() == HTMLTokenTypes::StartTag
2694             && token.name() != MathMLNames::mglyphTag
2695             && token.name() != MathMLNames::malignmarkTag)
2696             return false;
2697         if (token.type() == HTMLTokenTypes::Character)
2698             return false;
2699     }
2700     if (node->hasTagName(MathMLNames::annotation_xmlTag)
2701         && token.type() == HTMLTokenTypes::StartTag
2702         && token.name() == SVGNames::svgTag)
2703         return false;
2704     if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
2705         if (token.type() == HTMLTokenTypes::StartTag)
2706             return false;
2707         if (token.type() == HTMLTokenTypes::Character)
2708             return false;
2709     }
2710     if (token.type() == HTMLTokenTypes::EndOfFile)
2711         return false;
2712     return true;
2713 }
2714
2715 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken& token)
2716 {
2717     switch (token.type()) {
2718     case HTMLTokenTypes::Uninitialized:
2719         ASSERT_NOT_REACHED();
2720         break;
2721     case HTMLTokenTypes::DOCTYPE:
2722         parseError(token);
2723         break;
2724     case HTMLTokenTypes::StartTag: {
2725         if (token.name() == bTag
2726             || token.name() == bigTag
2727             || token.name() == blockquoteTag
2728             || token.name() == bodyTag
2729             || token.name() == brTag
2730             || token.name() == centerTag
2731             || token.name() == codeTag
2732             || token.name() == ddTag
2733             || token.name() == divTag
2734             || token.name() == dlTag
2735             || token.name() == dtTag
2736             || token.name() == emTag
2737             || token.name() == embedTag
2738             || isNumberedHeaderTag(token.name())
2739             || token.name() == headTag
2740             || token.name() == hrTag
2741             || token.name() == iTag
2742             || token.name() == imgTag
2743             || token.name() == liTag
2744             || token.name() == listingTag
2745             || token.name() == menuTag
2746             || token.name() == metaTag
2747             || token.name() == nobrTag
2748             || token.name() == olTag
2749             || token.name() == pTag
2750             || token.name() == preTag
2751             || token.name() == rubyTag
2752             || token.name() == sTag
2753             || token.name() == smallTag
2754             || token.name() == spanTag
2755             || token.name() == strongTag
2756             || token.name() == strikeTag
2757             || token.name() == subTag
2758             || token.name() == supTag
2759             || token.name() == tableTag
2760             || token.name() == ttTag
2761             || token.name() == uTag
2762             || token.name() == ulTag
2763             || token.name() == varTag
2764             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
2765             parseError(token);
2766             m_tree.openElements()->popUntilForeignContentScopeMarker();
2767             processStartTag(token);
2768             return;
2769         }
2770         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
2771         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2772             adjustMathMLAttributes(token);
2773         if (currentNamespace == SVGNames::svgNamespaceURI) {
2774             adjustSVGTagNameCase(token);
2775             adjustSVGAttributes(token);
2776         }
2777         adjustForeignAttributes(token);
2778         m_tree.insertForeignElement(token, currentNamespace);
2779         break;
2780     }
2781     case HTMLTokenTypes::EndTag: {
2782         if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
2783             adjustSVGTagNameCase(token);
2784
2785         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2786             m_isPaused = true;
2787             m_scriptToProcess = m_tree.currentElement();
2788             m_tree.openElements()->pop();
2789             return;
2790         }
2791         if (!isInHTMLNamespace(m_tree.currentNode())) {
2792             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2793             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2794             if (!nodeRecord->node()->hasLocalName(token.name()))
2795                 parseError(token);
2796             while (1) {
2797                 if (nodeRecord->node()->hasLocalName(token.name())) {
2798                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2799                     return;
2800                 }
2801                 nodeRecord = nodeRecord->next();
2802
2803                 if (isInHTMLNamespace(nodeRecord->node()))
2804                     break;
2805             }
2806         }
2807         // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2808         processEndTag(token);
2809         break;
2810     }
2811     case HTMLTokenTypes::Comment:
2812         m_tree.insertComment(token);
2813         return;
2814     case HTMLTokenTypes::Character: {
2815         String characters = String(token.characters().data(), token.characters().size());
2816         m_tree.insertTextNode(characters);
2817         if (m_framesetOk && !isAllWhitespace(characters))
2818             m_framesetOk = false;
2819         break;
2820     }
2821     case HTMLTokenTypes::EndOfFile:
2822         ASSERT_NOT_REACHED();
2823         break;
2824     }
2825 }
2826
2827 void HTMLTreeBuilder::finished()
2828 {
2829     if (isParsingFragment())
2830         return;
2831     
2832     ASSERT(m_document);
2833     // Warning, this may detach the parser. Do not do anything else after this.
2834     m_document->finishedParsing();
2835 }
2836
2837 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2838 {
2839 }
2840
2841 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2842 {
2843     if (!frame)
2844         return false;
2845     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2846 }
2847
2848 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2849 {
2850     if (!frame)
2851         return false;
2852     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2853 }
2854
2855 }