2b0c02e5f6f8976fe967c7280bc410df176d4d51
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 static const int uninitializedLineNumberValue = -1;
60
61 static TextPosition uninitializedPositionValue1()
62 {
63     return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
64 }
65
66 namespace {
67
68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70     return isHTMLSpace(character) || character == replacementCharacter;
71 }
72
73 inline bool isAllWhitespace(const String& string)
74 {
75     return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77
78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82
83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85     return tagName == h1Tag
86         || tagName == h2Tag
87         || tagName == h3Tag
88         || tagName == h4Tag
89         || tagName == h5Tag
90         || tagName == h6Tag;
91 }
92
93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95     return tagName == captionTag
96         || tagName == colTag
97         || tagName == colgroupTag;
98 }
99
100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102     return tagName == thTag || tagName == tdTag;
103 }
104
105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107     return tagName == tbodyTag
108         || tagName == tfootTag
109         || tagName == theadTag;
110 }
111
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
113 bool isSpecialNode(Node* node)
114 {
115     if (node->hasTagName(MathMLNames::miTag)
116         || node->hasTagName(MathMLNames::moTag)
117         || node->hasTagName(MathMLNames::mnTag)
118         || node->hasTagName(MathMLNames::msTag)
119         || node->hasTagName(MathMLNames::mtextTag)
120         || node->hasTagName(MathMLNames::annotation_xmlTag)
121         || node->hasTagName(SVGNames::foreignObjectTag)
122         || node->hasTagName(SVGNames::descTag)
123         || node->hasTagName(SVGNames::titleTag))
124         return true;
125     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126         return true;
127     if (!isInHTMLNamespace(node))
128         return false;
129     const AtomicString& tagName = node->localName();
130     return tagName == addressTag
131         || tagName == appletTag
132         || tagName == areaTag
133         || tagName == articleTag
134         || tagName == asideTag
135         || tagName == baseTag
136         || tagName == basefontTag
137         || tagName == bgsoundTag
138         || tagName == blockquoteTag
139         || tagName == bodyTag
140         || tagName == brTag
141         || tagName == buttonTag
142         || tagName == captionTag
143         || tagName == centerTag
144         || tagName == colTag
145         || tagName == colgroupTag
146         || tagName == commandTag
147         || tagName == ddTag
148         || tagName == detailsTag
149         || tagName == dirTag
150         || tagName == divTag
151         || tagName == dlTag
152         || tagName == dtTag
153         || tagName == embedTag
154         || tagName == fieldsetTag
155         || tagName == figcaptionTag
156         || tagName == figureTag
157         || tagName == footerTag
158         || tagName == formTag
159         || tagName == frameTag
160         || tagName == framesetTag
161         || isNumberedHeaderTag(tagName)
162         || tagName == headTag
163         || tagName == headerTag
164         || tagName == hgroupTag
165         || tagName == hrTag
166         || tagName == htmlTag
167         || tagName == iframeTag
168         || tagName == imgTag
169         || tagName == inputTag
170         || tagName == isindexTag
171         || tagName == liTag
172         || tagName == linkTag
173         || tagName == listingTag
174         || tagName == marqueeTag
175         || tagName == menuTag
176         || tagName == metaTag
177         || tagName == navTag
178         || tagName == noembedTag
179         || tagName == noframesTag
180         || tagName == noscriptTag
181         || tagName == objectTag
182         || tagName == olTag
183         || tagName == pTag
184         || tagName == paramTag
185         || tagName == plaintextTag
186         || tagName == preTag
187         || tagName == scriptTag
188         || tagName == sectionTag
189         || tagName == selectTag
190         || tagName == styleTag
191         || tagName == summaryTag
192         || tagName == tableTag
193         || isTableBodyContextTag(tagName)
194         || tagName == tdTag
195         || tagName == textareaTag
196         || tagName == thTag
197         || tagName == titleTag
198         || tagName == trTag
199         || tagName == ulTag
200         || tagName == wbrTag
201         || tagName == xmpTag;
202 }
203
204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 {
206     return tagName == bTag
207         || tagName == bigTag
208         || tagName == codeTag
209         || tagName == emTag
210         || tagName == fontTag
211         || tagName == iTag
212         || tagName == sTag
213         || tagName == smallTag
214         || tagName == strikeTag
215         || tagName == strongTag
216         || tagName == ttTag
217         || tagName == uTag;
218 }
219
220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 {
222     return tagName == nobrTag
223         || isNonAnchorNonNobrFormattingTag(tagName);
224 }
225
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
227 bool isFormattingTag(const AtomicString& tagName)
228 {
229     return tagName == aTag || isNonAnchorFormattingTag(tagName);
230 }
231
232 HTMLFormElement* closestFormAncestor(Element* element)
233 {
234     while (element) {
235         if (element->hasTagName(formTag))
236             return static_cast<HTMLFormElement*>(element);
237         ContainerNode* parent = element->parentNode();
238         if (!parent || !parent->isElementNode())
239             return 0;
240         element = static_cast<Element*>(parent);
241     }
242     return 0;
243 }
244
245 } // namespace
246
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 public:
250     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251         : m_current(token.characters().data())
252         , m_end(m_current + token.characters().size())
253     {
254         ASSERT(!isEmpty());
255     }
256
257     explicit ExternalCharacterTokenBuffer(const String& string)
258         : m_current(string.characters())
259         , m_end(m_current + string.length())
260     {
261         ASSERT(!isEmpty());
262     }
263
264     ~ExternalCharacterTokenBuffer()
265     {
266         ASSERT(isEmpty());
267     }
268
269     bool isEmpty() const { return m_current == m_end; }
270
271     void skipAtMostOneLeadingNewline()
272     {
273         ASSERT(!isEmpty());
274         if (*m_current == '\n')
275             ++m_current;
276     }
277
278     void skipLeadingWhitespace()
279     {
280         skipLeading<isHTMLSpace>();
281     }
282
283     String takeLeadingWhitespace()
284     {
285         return takeLeading<isHTMLSpace>();
286     }
287
288     void skipLeadingNonWhitespace()
289     {
290         skipLeading<isNotHTMLSpace>();
291     }
292
293     String takeRemaining()
294     {
295         ASSERT(!isEmpty());
296         const UChar* start = m_current;
297         m_current = m_end;
298         return String(start, m_current - start);
299     }
300
301     void giveRemainingTo(StringBuilder& recipient)
302     {
303         recipient.append(m_current, m_end - m_current);
304         m_current = m_end;
305     }
306
307     String takeRemainingWhitespace()
308     {
309         ASSERT(!isEmpty());
310         Vector<UChar> whitespace;
311         do {
312             UChar cc = *m_current++;
313             if (isHTMLSpace(cc))
314                 whitespace.append(cc);
315         } while (m_current < m_end);
316         // Returning the null string when there aren't any whitespace
317         // characters is slightly cleaner semantically because we don't want
318         // to insert a text node (as opposed to inserting an empty text node).
319         if (whitespace.isEmpty())
320             return String();
321         return String::adopt(whitespace);
322     }
323
324 private:
325     template<bool characterPredicate(UChar)>
326     void skipLeading()
327     {
328         ASSERT(!isEmpty());
329         while (characterPredicate(*m_current)) {
330             if (++m_current == m_end)
331                 return;
332         }
333     }
334
335     template<bool characterPredicate(UChar)>
336     String takeLeading()
337     {
338         ASSERT(!isEmpty());
339         const UChar* start = m_current;
340         skipLeading<characterPredicate>();
341         if (start == m_current)
342             return String();
343         return String(start, m_current - start);
344     }
345
346     const UChar* m_current;
347     const UChar* m_end;
348 };
349
350
351 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
352     : m_framesetOk(true)
353     , m_document(document)
354     , m_tree(document, maximumDOMTreeDepth)
355     , m_reportErrors(reportErrors)
356     , m_isPaused(false)
357     , m_insertionMode(InitialMode)
358     , m_originalInsertionMode(InitialMode)
359     , m_shouldSkipLeadingNewline(false)
360     , m_parser(parser)
361     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
362     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
363     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
364 {
365 }
366
367 // FIXME: Member variables should be grouped into self-initializing structs to
368 // minimize code duplication between these constructors.
369 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
370     : m_framesetOk(true)
371     , m_fragmentContext(fragment, contextElement, scriptingPermission)
372     , m_document(fragment->document())
373     , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
374     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
375     , m_isPaused(false)
376     , m_insertionMode(InitialMode)
377     , m_originalInsertionMode(InitialMode)
378     , m_shouldSkipLeadingNewline(false)
379     , m_parser(parser)
380     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
381     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
382     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
383 {
384     // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
385     ASSERT(contextElement);
386     if (contextElement) {
387         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
388         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
389         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
390         // and instead use the DocumentFragment as a root node.
391         m_tree.openElements()->pushRootNode(fragment);
392         resetInsertionModeAppropriately();
393         m_tree.setForm(closestFormAncestor(contextElement));
394     }
395 }
396
397 HTMLTreeBuilder::~HTMLTreeBuilder()
398 {
399 }
400
401 void HTMLTreeBuilder::detach()
402 {
403     // This call makes little sense in fragment mode, but for consistency
404     // DocumentParser expects detach() to always be called before it's destroyed.
405     m_document = 0;
406     // HTMLConstructionSite might be on the callstack when detach() is called
407     // otherwise we'd just call m_tree.clear() here instead.
408     m_tree.detach();
409 }
410
411 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
412     : m_fragment(0)
413     , m_contextElement(0)
414     , m_scriptingPermission(FragmentScriptingAllowed)
415 {
416 }
417
418 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
419     : m_fragment(fragment)
420     , m_contextElement(contextElement)
421     , m_scriptingPermission(scriptingPermission)
422 {
423     ASSERT(!fragment->hasChildNodes());
424 }
425
426 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
427 {
428 }
429
430 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
431 {
432     // Unpause ourselves, callers may pause us again when processing the script.
433     // The HTML5 spec is written as though scripts are executed inside the tree
434     // builder.  We pause the parser to exit the tree builder, and then resume
435     // before running scripts.
436     m_isPaused = false;
437     scriptStartPosition = m_scriptToProcessStartPosition;
438     m_scriptToProcessStartPosition = uninitializedPositionValue1();
439     return m_scriptToProcess.release();
440 }
441
442 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
443 {
444     AtomicHTMLToken token(rawToken);
445
446     // We clear the rawToken in case constructTreeFromAtomicToken
447     // synchronously re-enters the parser. We don't clear the token immedately
448     // for Character tokens because the AtomicHTMLToken avoids copying the
449     // characters by keeping a pointer to the underlying buffer in the
450     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
451     // the parser.
452     //
453     // FIXME: Stop clearing the rawToken once we start running the parser off
454     // the main thread or once we stop allowing synchronous JavaScript
455     // execution from parseAttribute.
456     if (rawToken.type() != HTMLTokenTypes::Character)
457         rawToken.clear();
458
459     constructTreeFromAtomicToken(token);
460
461     if (!rawToken.isUninitialized()) {
462         ASSERT(rawToken.type() == HTMLTokenTypes::Character);
463         rawToken.clear();
464     }
465 }
466
467 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
468 {
469     if (shouldProcessTokenInForeignContent(token))
470         processTokenInForeignContent(token);
471     else
472         processToken(token);
473
474     bool inForeignContent = !m_tree.isEmpty()
475         && !isInHTMLNamespace(m_tree.currentNode())
476         && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentNode())
477         && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentNode());
478
479     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
480     m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
481
482     m_tree.executeQueuedTasks();
483     // We might be detached now.
484 }
485
486 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
487 {
488     switch (token.type()) {
489     case HTMLTokenTypes::Uninitialized:
490         ASSERT_NOT_REACHED();
491         break;
492     case HTMLTokenTypes::DOCTYPE:
493         m_shouldSkipLeadingNewline = false;
494         processDoctypeToken(token);
495         break;
496     case HTMLTokenTypes::StartTag:
497         m_shouldSkipLeadingNewline = false;
498         processStartTag(token);
499         break;
500     case HTMLTokenTypes::EndTag:
501         m_shouldSkipLeadingNewline = false;
502         processEndTag(token);
503         break;
504     case HTMLTokenTypes::Comment:
505         m_shouldSkipLeadingNewline = false;
506         processComment(token);
507         return;
508     case HTMLTokenTypes::Character:
509         processCharacter(token);
510         break;
511     case HTMLTokenTypes::EndOfFile:
512         m_shouldSkipLeadingNewline = false;
513         processEndOfFile(token);
514         break;
515     }
516 }
517
518 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
519 {
520     ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
521     if (m_insertionMode == InitialMode) {
522         m_tree.insertDoctype(token);
523         setInsertionMode(BeforeHTMLMode);
524         return;
525     }
526     if (m_insertionMode == InTableTextMode) {
527         defaultForInTableText();
528         processDoctypeToken(token);
529         return;
530     }
531     parseError(token);
532 }
533
534 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
535 {
536     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
537     AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
538     processStartTag(fakeToken);
539 }
540
541 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
542 {
543     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
544     AtomicHTMLToken fakeToken(HTMLTokenTypes::EndTag, tagName.localName());
545     processEndTag(fakeToken);
546 }
547
548 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
549 {
550     ASSERT(!characters.isEmpty());
551     ExternalCharacterTokenBuffer buffer(characters);
552     processCharacterBuffer(buffer);
553 }
554
555 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
556 {
557     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
558         return;
559     AtomicHTMLToken endP(HTMLTokenTypes::EndTag, pTag.localName());
560     processEndTag(endP);
561 }
562
563 Vector<Attribute> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
564 {
565     Vector<Attribute> attributes = token.attributes();
566     for (int i = attributes.size() - 1; i >= 0; --i) {
567         const QualifiedName& name = attributes.at(i).name();
568         if (name.matches(nameAttr) || name.matches(actionAttr) || name.matches(promptAttr))
569             attributes.remove(i);
570     }
571
572     attributes.append(Attribute(nameAttr, isindexTag.localName()));
573     return attributes;
574 }
575
576 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
577 {
578     ASSERT(token.type() == HTMLTokenTypes::StartTag);
579     ASSERT(token.name() == isindexTag);
580     parseError(token);
581     if (m_tree.form())
582         return;
583     notImplemented(); // Acknowledge self-closing flag
584     processFakeStartTag(formTag);
585     Attribute* actionAttribute = token.getAttributeItem(actionAttr);
586     if (actionAttribute)
587         m_tree.form()->setAttribute(actionAttr, actionAttribute->value());
588     processFakeStartTag(hrTag);
589     processFakeStartTag(labelTag);
590     Attribute* promptAttribute = token.getAttributeItem(promptAttr);
591     if (promptAttribute)
592         processFakeCharacters(promptAttribute->value());
593     else
594         processFakeCharacters(searchableIndexIntroduction());
595     processFakeStartTag(inputTag, attributesForIsindexInput(token));
596     notImplemented(); // This second set of characters may be needed by non-english locales.
597     processFakeEndTag(labelTag);
598     processFakeStartTag(hrTag);
599     processFakeEndTag(formTag);
600 }
601
602 namespace {
603
604 bool isLi(const ContainerNode* element)
605 {
606     return element->hasTagName(liTag);
607 }
608
609 bool isDdOrDt(const ContainerNode* element)
610 {
611     return element->hasTagName(ddTag)
612         || element->hasTagName(dtTag);
613 }
614
615 }
616
617 template <bool shouldClose(const ContainerNode*)>
618 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
619 {
620     m_framesetOk = false;
621     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
622     while (1) {
623         RefPtr<ContainerNode> node = nodeRecord->node();
624         if (shouldClose(node.get())) {
625             ASSERT(node->isElementNode());
626             processFakeEndTag(toElement(node.get())->tagQName());
627             break;
628         }
629         if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
630             break;
631         nodeRecord = nodeRecord->next();
632     }
633     processFakePEndTagIfPInButtonScope();
634     m_tree.insertHTMLElement(token);
635 }
636
637 namespace {
638
639 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
640
641 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
642 {
643     for (size_t i = 0; i < length; ++i) {
644         const QualifiedName& name = *names[i];
645         const AtomicString& localName = name.localName();
646         AtomicString loweredLocalName = localName.lower();
647         if (loweredLocalName != localName)
648             map->add(loweredLocalName, name);
649     }
650 }
651
652 void adjustSVGTagNameCase(AtomicHTMLToken& token)
653 {
654     static PrefixedNameToQualifiedNameMap* caseMap = 0;
655     if (!caseMap) {
656         caseMap = new PrefixedNameToQualifiedNameMap;
657         size_t length = 0;
658         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
659         mapLoweredLocalNameToName(caseMap, svgTags, length);
660     }
661
662     const QualifiedName& casedName = caseMap->get(token.name());
663     if (casedName.localName().isNull())
664         return;
665     token.setName(casedName.localName());
666 }
667
668 template<QualifiedName** getAttrs(size_t* length)>
669 void adjustAttributes(AtomicHTMLToken& token)
670 {
671     static PrefixedNameToQualifiedNameMap* caseMap = 0;
672     if (!caseMap) {
673         caseMap = new PrefixedNameToQualifiedNameMap;
674         size_t length = 0;
675         QualifiedName** attrs = getAttrs(&length);
676         mapLoweredLocalNameToName(caseMap, attrs, length);
677     }
678
679     for (unsigned i = 0; i < token.attributes().size(); ++i) {
680         Attribute& tokenAttribute = token.attributes().at(i);
681         const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
682         if (!casedName.localName().isNull())
683             tokenAttribute.parserSetName(casedName);
684     }
685 }
686
687 void adjustSVGAttributes(AtomicHTMLToken& token)
688 {
689     adjustAttributes<SVGNames::getSVGAttrs>(token);
690 }
691
692 void adjustMathMLAttributes(AtomicHTMLToken& token)
693 {
694     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
695 }
696
697 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
698 {
699     for (size_t i = 0; i < length; ++i) {
700         QualifiedName* name = names[i];
701         const AtomicString& localName = name->localName();
702         AtomicString prefixColonLocalName = prefix + ':' + localName;
703         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
704         map->add(prefixColonLocalName, nameWithPrefix);
705     }
706 }
707
708 void adjustForeignAttributes(AtomicHTMLToken& token)
709 {
710     static PrefixedNameToQualifiedNameMap* map = 0;
711     if (!map) {
712         map = new PrefixedNameToQualifiedNameMap;
713         size_t length = 0;
714         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
715         addNamesWithPrefix(map, "xlink", attrs, length);
716
717         attrs = XMLNames::getXMLAttrs(&length);
718         addNamesWithPrefix(map, "xml", attrs, length);
719
720         map->add("xmlns", XMLNSNames::xmlnsAttr);
721         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
722     }
723
724     for (unsigned i = 0; i < token.attributes().size(); ++i) {
725         Attribute& tokenAttribute = token.attributes().at(i);
726         const QualifiedName& name = map->get(tokenAttribute.localName());
727         if (!name.localName().isNull())
728             tokenAttribute.parserSetName(name);
729     }
730 }
731
732 }
733
734 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
735 {
736     ASSERT(token.type() == HTMLTokenTypes::StartTag);
737     if (token.name() == htmlTag) {
738         m_tree.insertHTMLHtmlStartTagInBody(token);
739         return;
740     }
741     if (token.name() == baseTag
742         || token.name() == basefontTag
743         || token.name() == bgsoundTag
744         || token.name() == commandTag
745         || token.name() == linkTag
746         || token.name() == metaTag
747         || token.name() == noframesTag
748         || token.name() == scriptTag
749         || token.name() == styleTag
750         || token.name() == titleTag) {
751         bool didProcess = processStartTagForInHead(token);
752         ASSERT_UNUSED(didProcess, didProcess);
753         return;
754     }
755     if (token.name() == bodyTag) {
756         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
757             ASSERT(isParsingFragment());
758             return;
759         }
760         m_framesetOk = false;
761         m_tree.insertHTMLBodyStartTagInBody(token);
762         return;
763     }
764     if (token.name() == framesetTag) {
765         parseError(token);
766         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
767             ASSERT(isParsingFragment());
768             return;
769         }
770         if (!m_framesetOk)
771             return;
772         ExceptionCode ec = 0;
773         m_tree.openElements()->bodyElement()->remove(ec);
774         ASSERT(!ec);
775         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
776         m_tree.openElements()->popHTMLBodyElement();
777         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
778         m_tree.insertHTMLElement(token);
779         setInsertionMode(InFramesetMode);
780         return;
781     }
782     if (token.name() == addressTag
783         || token.name() == articleTag
784         || token.name() == asideTag
785         || token.name() == blockquoteTag
786         || token.name() == centerTag
787         || token.name() == detailsTag
788         || token.name() == dirTag
789         || token.name() == divTag
790         || token.name() == dlTag
791         || token.name() == fieldsetTag
792         || token.name() == figcaptionTag
793         || token.name() == figureTag
794         || token.name() == footerTag
795         || token.name() == headerTag
796         || token.name() == hgroupTag
797         || token.name() == menuTag
798         || token.name() == navTag
799         || token.name() == olTag
800         || token.name() == pTag
801         || token.name() == sectionTag
802         || token.name() == summaryTag
803         || token.name() == ulTag) {
804         processFakePEndTagIfPInButtonScope();
805         m_tree.insertHTMLElement(token);
806         return;
807     }
808     if (isNumberedHeaderTag(token.name())) {
809         processFakePEndTagIfPInButtonScope();
810         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
811             parseError(token);
812             m_tree.openElements()->pop();
813         }
814         m_tree.insertHTMLElement(token);
815         return;
816     }
817     if (token.name() == preTag || token.name() == listingTag) {
818         processFakePEndTagIfPInButtonScope();
819         m_tree.insertHTMLElement(token);
820         m_shouldSkipLeadingNewline = true;
821         m_framesetOk = false;
822         return;
823     }
824     if (token.name() == formTag) {
825         if (m_tree.form()) {
826             parseError(token);
827             return;
828         }
829         processFakePEndTagIfPInButtonScope();
830         m_tree.insertHTMLFormElement(token);
831         return;
832     }
833     if (token.name() == liTag) {
834         processCloseWhenNestedTag<isLi>(token);
835         return;
836     }
837     if (token.name() == ddTag || token.name() == dtTag) {
838         processCloseWhenNestedTag<isDdOrDt>(token);
839         return;
840     }
841     if (token.name() == plaintextTag) {
842         processFakePEndTagIfPInButtonScope();
843         m_tree.insertHTMLElement(token);
844         m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
845         return;
846     }
847     if (token.name() == buttonTag) {
848         if (m_tree.openElements()->inScope(buttonTag)) {
849             parseError(token);
850             processFakeEndTag(buttonTag);
851             processStartTag(token); // FIXME: Could we just fall through here?
852             return;
853         }
854         m_tree.reconstructTheActiveFormattingElements();
855         m_tree.insertHTMLElement(token);
856         m_framesetOk = false;
857         return;
858     }
859     if (token.name() == aTag) {
860         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
861         if (activeATag) {
862             parseError(token);
863             processFakeEndTag(aTag);
864             m_tree.activeFormattingElements()->remove(activeATag);
865             if (m_tree.openElements()->contains(activeATag))
866                 m_tree.openElements()->remove(activeATag);
867         }
868         m_tree.reconstructTheActiveFormattingElements();
869         m_tree.insertFormattingElement(token);
870         return;
871     }
872     if (isNonAnchorNonNobrFormattingTag(token.name())) {
873         m_tree.reconstructTheActiveFormattingElements();
874         m_tree.insertFormattingElement(token);
875         return;
876     }
877     if (token.name() == nobrTag) {
878         m_tree.reconstructTheActiveFormattingElements();
879         if (m_tree.openElements()->inScope(nobrTag)) {
880             parseError(token);
881             processFakeEndTag(nobrTag);
882             m_tree.reconstructTheActiveFormattingElements();
883         }
884         m_tree.insertFormattingElement(token);
885         return;
886     }
887     if (token.name() == appletTag
888         || token.name() == marqueeTag
889         || token.name() == objectTag) {
890         m_tree.reconstructTheActiveFormattingElements();
891         m_tree.insertHTMLElement(token);
892         m_tree.activeFormattingElements()->appendMarker();
893         m_framesetOk = false;
894         return;
895     }
896     if (token.name() == tableTag) {
897         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
898             processFakeEndTag(pTag);
899         m_tree.insertHTMLElement(token);
900         m_framesetOk = false;
901         setInsertionMode(InTableMode);
902         return;
903     }
904     if (token.name() == imageTag) {
905         parseError(token);
906         // Apparently we're not supposed to ask.
907         token.setName(imgTag.localName());
908         // Note the fall through to the imgTag handling below!
909     }
910     if (token.name() == areaTag
911         || token.name() == brTag
912         || token.name() == embedTag
913         || token.name() == imgTag
914         || token.name() == keygenTag
915         || token.name() == wbrTag) {
916         m_tree.reconstructTheActiveFormattingElements();
917         m_tree.insertSelfClosingHTMLElement(token);
918         m_framesetOk = false;
919         return;
920     }
921     if (token.name() == inputTag) {
922         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
923         m_tree.reconstructTheActiveFormattingElements();
924         m_tree.insertSelfClosingHTMLElement(token);
925         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
926             m_framesetOk = false;
927         return;
928     }
929     if (token.name() == paramTag
930         || token.name() == sourceTag
931         || token.name() == trackTag) {
932         m_tree.insertSelfClosingHTMLElement(token);
933         return;
934     }
935     if (token.name() == hrTag) {
936         processFakePEndTagIfPInButtonScope();
937         m_tree.insertSelfClosingHTMLElement(token);
938         m_framesetOk = false;
939         return;
940     }
941     if (token.name() == isindexTag) {
942         processIsindexStartTagForInBody(token);
943         return;
944     }
945     if (token.name() == textareaTag) {
946         m_tree.insertHTMLElement(token);
947         m_shouldSkipLeadingNewline = true;
948         m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
949         m_originalInsertionMode = m_insertionMode;
950         m_framesetOk = false;
951         setInsertionMode(TextMode);
952         return;
953     }
954     if (token.name() == xmpTag) {
955         processFakePEndTagIfPInButtonScope();
956         m_tree.reconstructTheActiveFormattingElements();
957         m_framesetOk = false;
958         processGenericRawTextStartTag(token);
959         return;
960     }
961     if (token.name() == iframeTag) {
962         m_framesetOk = false;
963         processGenericRawTextStartTag(token);
964         return;
965     }
966     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
967         processGenericRawTextStartTag(token);
968         return;
969     }
970     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
971         processGenericRawTextStartTag(token);
972         return;
973     }
974     if (token.name() == selectTag) {
975         m_tree.reconstructTheActiveFormattingElements();
976         m_tree.insertHTMLElement(token);
977         m_framesetOk = false;
978         if (m_insertionMode == InTableMode
979              || m_insertionMode == InCaptionMode
980              || m_insertionMode == InColumnGroupMode
981              || m_insertionMode == InTableBodyMode
982              || m_insertionMode == InRowMode
983              || m_insertionMode == InCellMode)
984             setInsertionMode(InSelectInTableMode);
985         else
986             setInsertionMode(InSelectMode);
987         return;
988     }
989     if (token.name() == optgroupTag || token.name() == optionTag) {
990         if (m_tree.currentNode()->hasTagName(optionTag)) {
991             AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
992             processEndTag(endOption);
993         }
994         m_tree.reconstructTheActiveFormattingElements();
995         m_tree.insertHTMLElement(token);
996         return;
997     }
998     if (token.name() == rpTag || token.name() == rtTag) {
999         if (m_tree.openElements()->inScope(rubyTag.localName())) {
1000             m_tree.generateImpliedEndTags();
1001             if (!m_tree.currentNode()->hasTagName(rubyTag))
1002                 parseError(token);
1003         }
1004         m_tree.insertHTMLElement(token);
1005         return;
1006     }
1007     if (token.name() == MathMLNames::mathTag.localName()) {
1008         m_tree.reconstructTheActiveFormattingElements();
1009         adjustMathMLAttributes(token);
1010         adjustForeignAttributes(token);
1011         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1012         return;
1013     }
1014     if (token.name() == SVGNames::svgTag.localName()) {
1015         m_tree.reconstructTheActiveFormattingElements();
1016         adjustSVGAttributes(token);
1017         adjustForeignAttributes(token);
1018         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1019         return;
1020     }
1021     if (isCaptionColOrColgroupTag(token.name())
1022         || token.name() == frameTag
1023         || token.name() == headTag
1024         || isTableBodyContextTag(token.name())
1025         || isTableCellContextTag(token.name())
1026         || token.name() == trTag) {
1027         parseError(token);
1028         return;
1029     }
1030     m_tree.reconstructTheActiveFormattingElements();
1031     m_tree.insertHTMLElement(token);
1032 }
1033
1034 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1035 {
1036     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1037         ASSERT(isParsingFragment());
1038         // FIXME: parse error
1039         return false;
1040     }
1041     m_tree.openElements()->pop();
1042     setInsertionMode(InTableMode);
1043     return true;
1044 }
1045
1046 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1047 void HTMLTreeBuilder::closeTheCell()
1048 {
1049     ASSERT(insertionMode() == InCellMode);
1050     if (m_tree.openElements()->inTableScope(tdTag)) {
1051         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1052         processFakeEndTag(tdTag);
1053         return;
1054     }
1055     ASSERT(m_tree.openElements()->inTableScope(thTag));
1056     processFakeEndTag(thTag);
1057     ASSERT(insertionMode() == InRowMode);
1058 }
1059
1060 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1061 {
1062     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1063     if (token.name() == captionTag) {
1064         m_tree.openElements()->popUntilTableScopeMarker();
1065         m_tree.activeFormattingElements()->appendMarker();
1066         m_tree.insertHTMLElement(token);
1067         setInsertionMode(InCaptionMode);
1068         return;
1069     }
1070     if (token.name() == colgroupTag) {
1071         m_tree.openElements()->popUntilTableScopeMarker();
1072         m_tree.insertHTMLElement(token);
1073         setInsertionMode(InColumnGroupMode);
1074         return;
1075     }
1076     if (token.name() == colTag) {
1077         processFakeStartTag(colgroupTag);
1078         ASSERT(InColumnGroupMode);
1079         processStartTag(token);
1080         return;
1081     }
1082     if (isTableBodyContextTag(token.name())) {
1083         m_tree.openElements()->popUntilTableScopeMarker();
1084         m_tree.insertHTMLElement(token);
1085         setInsertionMode(InTableBodyMode);
1086         return;
1087     }
1088     if (isTableCellContextTag(token.name())
1089         || token.name() == trTag) {
1090         processFakeStartTag(tbodyTag);
1091         ASSERT(insertionMode() == InTableBodyMode);
1092         processStartTag(token);
1093         return;
1094     }
1095     if (token.name() == tableTag) {
1096         parseError(token);
1097         if (!processTableEndTagForInTable()) {
1098             ASSERT(isParsingFragment());
1099             return;
1100         }
1101         processStartTag(token);
1102         return;
1103     }
1104     if (token.name() == styleTag || token.name() == scriptTag) {
1105         processStartTagForInHead(token);
1106         return;
1107     }
1108     if (token.name() == inputTag) {
1109         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1110         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1111             parseError(token);
1112             m_tree.insertSelfClosingHTMLElement(token);
1113             return;
1114         }
1115         // Fall through to "anything else" case.
1116     }
1117     if (token.name() == formTag) {
1118         parseError(token);
1119         if (m_tree.form())
1120             return;
1121         m_tree.insertHTMLFormElement(token, true);
1122         m_tree.openElements()->pop();
1123         return;
1124     }
1125     parseError(token);
1126     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1127     processStartTagForInBody(token);
1128 }
1129
1130 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1131 {
1132     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1133     switch (insertionMode()) {
1134     case InitialMode:
1135         ASSERT(insertionMode() == InitialMode);
1136         defaultForInitial();
1137         // Fall through.
1138     case BeforeHTMLMode:
1139         ASSERT(insertionMode() == BeforeHTMLMode);
1140         if (token.name() == htmlTag) {
1141             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1142             setInsertionMode(BeforeHeadMode);
1143             return;
1144         }
1145         defaultForBeforeHTML();
1146         // Fall through.
1147     case BeforeHeadMode:
1148         ASSERT(insertionMode() == BeforeHeadMode);
1149         if (token.name() == htmlTag) {
1150             m_tree.insertHTMLHtmlStartTagInBody(token);
1151             return;
1152         }
1153         if (token.name() == headTag) {
1154             m_tree.insertHTMLHeadElement(token);
1155             setInsertionMode(InHeadMode);
1156             return;
1157         }
1158         defaultForBeforeHead();
1159         // Fall through.
1160     case InHeadMode:
1161         ASSERT(insertionMode() == InHeadMode);
1162         if (processStartTagForInHead(token))
1163             return;
1164         defaultForInHead();
1165         // Fall through.
1166     case AfterHeadMode:
1167         ASSERT(insertionMode() == AfterHeadMode);
1168         if (token.name() == htmlTag) {
1169             m_tree.insertHTMLHtmlStartTagInBody(token);
1170             return;
1171         }
1172         if (token.name() == bodyTag) {
1173             m_framesetOk = false;
1174             m_tree.insertHTMLBodyElement(token);
1175             setInsertionMode(InBodyMode);
1176             return;
1177         }
1178         if (token.name() == framesetTag) {
1179             m_tree.insertHTMLElement(token);
1180             setInsertionMode(InFramesetMode);
1181             return;
1182         }
1183         if (token.name() == baseTag
1184             || token.name() == basefontTag
1185             || token.name() == bgsoundTag
1186             || token.name() == linkTag
1187             || token.name() == metaTag
1188             || token.name() == noframesTag
1189             || token.name() == scriptTag
1190             || token.name() == styleTag
1191             || token.name() == titleTag) {
1192             parseError(token);
1193             ASSERT(m_tree.head());
1194             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1195             processStartTagForInHead(token);
1196             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1197             return;
1198         }
1199         if (token.name() == headTag) {
1200             parseError(token);
1201             return;
1202         }
1203         defaultForAfterHead();
1204         // Fall through
1205     case InBodyMode:
1206         ASSERT(insertionMode() == InBodyMode);
1207         processStartTagForInBody(token);
1208         break;
1209     case InTableMode:
1210         ASSERT(insertionMode() == InTableMode);
1211         processStartTagForInTable(token);
1212         break;
1213     case InCaptionMode:
1214         ASSERT(insertionMode() == InCaptionMode);
1215         if (isCaptionColOrColgroupTag(token.name())
1216             || isTableBodyContextTag(token.name())
1217             || isTableCellContextTag(token.name())
1218             || token.name() == trTag) {
1219             parseError(token);
1220             if (!processCaptionEndTagForInCaption()) {
1221                 ASSERT(isParsingFragment());
1222                 return;
1223             }
1224             processStartTag(token);
1225             return;
1226         }
1227         processStartTagForInBody(token);
1228         break;
1229     case InColumnGroupMode:
1230         ASSERT(insertionMode() == InColumnGroupMode);
1231         if (token.name() == htmlTag) {
1232             m_tree.insertHTMLHtmlStartTagInBody(token);
1233             return;
1234         }
1235         if (token.name() == colTag) {
1236             m_tree.insertSelfClosingHTMLElement(token);
1237             return;
1238         }
1239         if (!processColgroupEndTagForInColumnGroup()) {
1240             ASSERT(isParsingFragment());
1241             return;
1242         }
1243         processStartTag(token);
1244         break;
1245     case InTableBodyMode:
1246         ASSERT(insertionMode() == InTableBodyMode);
1247         if (token.name() == trTag) {
1248             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1249             m_tree.insertHTMLElement(token);
1250             setInsertionMode(InRowMode);
1251             return;
1252         }
1253         if (isTableCellContextTag(token.name())) {
1254             parseError(token);
1255             processFakeStartTag(trTag);
1256             ASSERT(insertionMode() == InRowMode);
1257             processStartTag(token);
1258             return;
1259         }
1260         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1261             // FIXME: This is slow.
1262             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1263                 ASSERT(isParsingFragment());
1264                 parseError(token);
1265                 return;
1266             }
1267             m_tree.openElements()->popUntilTableBodyScopeMarker();
1268             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1269             processFakeEndTag(m_tree.currentElement()->tagQName());
1270             processStartTag(token);
1271             return;
1272         }
1273         processStartTagForInTable(token);
1274         break;
1275     case InRowMode:
1276         ASSERT(insertionMode() == InRowMode);
1277         if (isTableCellContextTag(token.name())) {
1278             m_tree.openElements()->popUntilTableRowScopeMarker();
1279             m_tree.insertHTMLElement(token);
1280             setInsertionMode(InCellMode);
1281             m_tree.activeFormattingElements()->appendMarker();
1282             return;
1283         }
1284         if (token.name() == trTag
1285             || isCaptionColOrColgroupTag(token.name())
1286             || isTableBodyContextTag(token.name())) {
1287             if (!processTrEndTagForInRow()) {
1288                 ASSERT(isParsingFragment());
1289                 return;
1290             }
1291             ASSERT(insertionMode() == InTableBodyMode);
1292             processStartTag(token);
1293             return;
1294         }
1295         processStartTagForInTable(token);
1296         break;
1297     case InCellMode:
1298         ASSERT(insertionMode() == InCellMode);
1299         if (isCaptionColOrColgroupTag(token.name())
1300             || isTableCellContextTag(token.name())
1301             || token.name() == trTag
1302             || isTableBodyContextTag(token.name())) {
1303             // FIXME: This could be more efficient.
1304             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1305                 ASSERT(isParsingFragment());
1306                 parseError(token);
1307                 return;
1308             }
1309             closeTheCell();
1310             processStartTag(token);
1311             return;
1312         }
1313         processStartTagForInBody(token);
1314         break;
1315     case AfterBodyMode:
1316     case AfterAfterBodyMode:
1317         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1318         if (token.name() == htmlTag) {
1319             m_tree.insertHTMLHtmlStartTagInBody(token);
1320             return;
1321         }
1322         setInsertionMode(InBodyMode);
1323         processStartTag(token);
1324         break;
1325     case InHeadNoscriptMode:
1326         ASSERT(insertionMode() == InHeadNoscriptMode);
1327         if (token.name() == htmlTag) {
1328             m_tree.insertHTMLHtmlStartTagInBody(token);
1329             return;
1330         }
1331         if (token.name() == basefontTag
1332             || token.name() == bgsoundTag
1333             || token.name() == linkTag
1334             || token.name() == metaTag
1335             || token.name() == noframesTag
1336             || token.name() == styleTag) {
1337             bool didProcess = processStartTagForInHead(token);
1338             ASSERT_UNUSED(didProcess, didProcess);
1339             return;
1340         }
1341         if (token.name() == htmlTag || token.name() == noscriptTag) {
1342             parseError(token);
1343             return;
1344         }
1345         defaultForInHeadNoscript();
1346         processToken(token);
1347         break;
1348     case InFramesetMode:
1349         ASSERT(insertionMode() == InFramesetMode);
1350         if (token.name() == htmlTag) {
1351             m_tree.insertHTMLHtmlStartTagInBody(token);
1352             return;
1353         }
1354         if (token.name() == framesetTag) {
1355             m_tree.insertHTMLElement(token);
1356             return;
1357         }
1358         if (token.name() == frameTag) {
1359             m_tree.insertSelfClosingHTMLElement(token);
1360             return;
1361         }
1362         if (token.name() == noframesTag) {
1363             processStartTagForInHead(token);
1364             return;
1365         }
1366         parseError(token);
1367         break;
1368     case AfterFramesetMode:
1369     case AfterAfterFramesetMode:
1370         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1371         if (token.name() == htmlTag) {
1372             m_tree.insertHTMLHtmlStartTagInBody(token);
1373             return;
1374         }
1375         if (token.name() == noframesTag) {
1376             processStartTagForInHead(token);
1377             return;
1378         }
1379         parseError(token);
1380         break;
1381     case InSelectInTableMode:
1382         ASSERT(insertionMode() == InSelectInTableMode);
1383         if (token.name() == captionTag
1384             || token.name() == tableTag
1385             || isTableBodyContextTag(token.name())
1386             || token.name() == trTag
1387             || isTableCellContextTag(token.name())) {
1388             parseError(token);
1389             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1390             processEndTag(endSelect);
1391             processStartTag(token);
1392             return;
1393         }
1394         // Fall through
1395     case InSelectMode:
1396         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1397         if (token.name() == htmlTag) {
1398             m_tree.insertHTMLHtmlStartTagInBody(token);
1399             return;
1400         }
1401         if (token.name() == optionTag) {
1402             if (m_tree.currentNode()->hasTagName(optionTag)) {
1403                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1404                 processEndTag(endOption);
1405             }
1406             m_tree.insertHTMLElement(token);
1407             return;
1408         }
1409         if (token.name() == optgroupTag) {
1410             if (m_tree.currentNode()->hasTagName(optionTag)) {
1411                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1412                 processEndTag(endOption);
1413             }
1414             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1415                 AtomicHTMLToken endOptgroup(HTMLTokenTypes::EndTag, optgroupTag.localName());
1416                 processEndTag(endOptgroup);
1417             }
1418             m_tree.insertHTMLElement(token);
1419             return;
1420         }
1421         if (token.name() == selectTag) {
1422             parseError(token);
1423             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1424             processEndTag(endSelect);
1425             return;
1426         }
1427         if (token.name() == inputTag
1428             || token.name() == keygenTag
1429             || token.name() == textareaTag) {
1430             parseError(token);
1431             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1432                 ASSERT(isParsingFragment());
1433                 return;
1434             }
1435             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1436             processEndTag(endSelect);
1437             processStartTag(token);
1438             return;
1439         }
1440         if (token.name() == scriptTag) {
1441             bool didProcess = processStartTagForInHead(token);
1442             ASSERT_UNUSED(didProcess, didProcess);
1443             return;
1444         }
1445         break;
1446     case InTableTextMode:
1447         defaultForInTableText();
1448         processStartTag(token);
1449         break;
1450     case TextMode:
1451         ASSERT_NOT_REACHED();
1452         break;
1453     }
1454 }
1455
1456 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1457 {
1458     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1459     ASSERT(token.name() == bodyTag);
1460     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1461         parseError(token);
1462         return false;
1463     }
1464     notImplemented(); // Emit a more specific parse error based on stack contents.
1465     setInsertionMode(AfterBodyMode);
1466     return true;
1467 }
1468
1469 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1470 {
1471     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1472     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1473     while (1) {
1474         RefPtr<ContainerNode> node = record->node();
1475         if (node->hasLocalName(token.name())) {
1476             m_tree.generateImpliedEndTags();
1477             // FIXME: The ElementRecord pointed to by record might be deleted by
1478             // the preceding call. Perhaps we should hold a RefPtr so that it
1479             // stays alive for the duration of record's scope.
1480             record = 0;
1481             if (!m_tree.currentNode()->hasLocalName(token.name())) {
1482                 parseError(token);
1483                 // FIXME: This is either a bug in the spec, or a bug in our
1484                 // implementation.  Filed a bug with HTML5:
1485                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1486                 // We might have already popped the node for the token in
1487                 // generateImpliedEndTags, just abort.
1488                 if (!m_tree.openElements()->contains(toElement(node.get())))
1489                     return;
1490             }
1491             m_tree.openElements()->popUntilPopped(toElement(node.get()));
1492             return;
1493         }
1494         if (isSpecialNode(node.get())) {
1495             parseError(token);
1496             return;
1497         }
1498         record = record->next();
1499     }
1500 }
1501
1502 // FIXME: This probably belongs on HTMLElementStack.
1503 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1504 {
1505     HTMLElementStack::ElementRecord* furthestBlock = 0;
1506     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1507     for (; record; record = record->next()) {
1508         if (record->element() == formattingElement)
1509             return furthestBlock;
1510         if (isSpecialNode(record->element()))
1511             furthestBlock = record;
1512     }
1513     ASSERT_NOT_REACHED();
1514     return 0;
1515 }
1516
1517 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1518 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1519 {
1520     // The adoption agency algorithm is N^2. We limit the number of iterations
1521     // to stop from hanging the whole browser. This limit is specified in the
1522     // adoption agency algorithm: 
1523     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1524     static const int outerIterationLimit = 8;
1525     static const int innerIterationLimit = 3;
1526
1527     for (int i = 0; i < outerIterationLimit; ++i) {
1528         // 1.
1529         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1530         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1531             parseError(token);
1532             notImplemented(); // Check the stack of open elements for a more specific parse error.
1533             return;
1534         }
1535         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1536         if (!formattingElementRecord) {
1537             parseError(token);
1538             m_tree.activeFormattingElements()->remove(formattingElement);
1539             return;
1540         }
1541         if (formattingElement != m_tree.currentElement())
1542             parseError(token);
1543         // 2.
1544         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1545         // 3.
1546         if (!furthestBlock) {
1547             m_tree.openElements()->popUntilPopped(formattingElement);
1548             m_tree.activeFormattingElements()->remove(formattingElement);
1549             return;
1550         }
1551         // 4.
1552         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1553         RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1554         // 5.
1555         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1556         // 6.
1557         HTMLElementStack::ElementRecord* node = furthestBlock;
1558         HTMLElementStack::ElementRecord* nextNode = node->next();
1559         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1560         for (int i = 0; i < innerIterationLimit; ++i) {
1561             // 6.1
1562             node = nextNode;
1563             ASSERT(node);
1564             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1565             // 6.2
1566             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1567                 m_tree.openElements()->remove(node->element());
1568                 node = 0;
1569                 continue;
1570             }
1571             // 6.3
1572             if (node == formattingElementRecord)
1573                 break;
1574             // 6.5
1575             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1576             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1577             nodeEntry->replaceElement(newElement.get());
1578             node->replaceElement(newElement.release());
1579             // 6.4 -- Intentionally out of order to handle the case where node
1580             // was replaced in 6.5.
1581             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1582             if (lastNode == furthestBlock)
1583                 bookmark.moveToAfter(nodeEntry);
1584             // 6.6
1585             if (ContainerNode* parent = lastNode->element()->parentNode())
1586                 parent->parserRemoveChild(lastNode->element());
1587             node->element()->parserAddChild(lastNode->element());
1588             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1589                 lastNode->element()->lazyAttach();
1590             // 6.7
1591             lastNode = node;
1592         }
1593         // 7
1594         const AtomicString& commonAncestorTag = commonAncestor->localName();
1595         if (ContainerNode* parent = lastNode->element()->parentNode())
1596             parent->parserRemoveChild(lastNode->element());
1597         // FIXME: If this moves to HTMLConstructionSite, this check should use
1598         // causesFosterParenting(tagName) instead.
1599         if (commonAncestorTag == tableTag
1600             || commonAncestorTag == trTag
1601             || isTableBodyContextTag(commonAncestorTag))
1602             m_tree.fosterParent(lastNode->element());
1603         else {
1604             commonAncestor->parserAddChild(lastNode->element());
1605             ASSERT(lastNode->node()->isElementNode());
1606             ASSERT(lastNode->element()->parentNode());
1607             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1608                 lastNode->element()->lazyAttach();
1609         }
1610         // 8
1611         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1612         // 9
1613         newElement->takeAllChildrenFrom(furthestBlock->element());
1614         // 10
1615         Element* furthestBlockElement = furthestBlock->element();
1616         // FIXME: All this creation / parserAddChild / attach business should
1617         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1618         //        should all be in some HTMLConstructionSite function.
1619         furthestBlockElement->parserAddChild(newElement);
1620         if (furthestBlockElement->attached() && !newElement->attached()) {
1621             // Notice that newElement might already be attached if, for example, one of the reparented
1622             // children is a style element, which attaches itself automatically.
1623             newElement->attach();
1624         }
1625         // 11
1626         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1627         // 12
1628         m_tree.openElements()->remove(formattingElement);
1629         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1630     }
1631 }
1632
1633 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1634 {
1635     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1636     bool last = false;
1637     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1638     while (1) {
1639         ContainerNode* node = nodeRecord->node();
1640         if (node == m_tree.openElements()->rootNode()) {
1641             ASSERT(isParsingFragment());
1642             last = true;
1643             node = m_fragmentContext.contextElement();
1644         }
1645         if (node->hasTagName(selectTag)) {
1646             ASSERT(isParsingFragment());
1647             return setInsertionMode(InSelectMode);
1648         }
1649         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1650             return setInsertionMode(InCellMode);
1651         if (node->hasTagName(trTag))
1652             return setInsertionMode(InRowMode);
1653         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1654             return setInsertionMode(InTableBodyMode);
1655         if (node->hasTagName(captionTag))
1656             return setInsertionMode(InCaptionMode);
1657         if (node->hasTagName(colgroupTag)) {
1658             ASSERT(isParsingFragment());
1659             return setInsertionMode(InColumnGroupMode);
1660         }
1661         if (node->hasTagName(tableTag))
1662             return setInsertionMode(InTableMode);
1663         if (node->hasTagName(headTag)) {
1664             ASSERT(isParsingFragment());
1665             return setInsertionMode(InBodyMode);
1666         }
1667         if (node->hasTagName(bodyTag))
1668             return setInsertionMode(InBodyMode);
1669         if (node->hasTagName(framesetTag)) {
1670             ASSERT(isParsingFragment());
1671             return setInsertionMode(InFramesetMode);
1672         }
1673         if (node->hasTagName(htmlTag)) {
1674             ASSERT(isParsingFragment());
1675             return setInsertionMode(BeforeHeadMode);
1676         }
1677         if (last) {
1678             ASSERT(isParsingFragment());
1679             return setInsertionMode(InBodyMode);
1680         }
1681         nodeRecord = nodeRecord->next();
1682     }
1683 }
1684
1685 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1686 {
1687     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1688     if (isTableBodyContextTag(token.name())) {
1689         if (!m_tree.openElements()->inTableScope(token.name())) {
1690             parseError(token);
1691             return;
1692         }
1693         m_tree.openElements()->popUntilTableBodyScopeMarker();
1694         m_tree.openElements()->pop();
1695         setInsertionMode(InTableMode);
1696         return;
1697     }
1698     if (token.name() == tableTag) {
1699         // FIXME: This is slow.
1700         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1701             ASSERT(isParsingFragment());
1702             parseError(token);
1703             return;
1704         }
1705         m_tree.openElements()->popUntilTableBodyScopeMarker();
1706         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1707         processFakeEndTag(m_tree.currentElement()->tagQName());
1708         processEndTag(token);
1709         return;
1710     }
1711     if (token.name() == bodyTag
1712         || isCaptionColOrColgroupTag(token.name())
1713         || token.name() == htmlTag
1714         || isTableCellContextTag(token.name())
1715         || token.name() == trTag) {
1716         parseError(token);
1717         return;
1718     }
1719     processEndTagForInTable(token);
1720 }
1721
1722 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1723 {
1724     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1725     if (token.name() == trTag) {
1726         processTrEndTagForInRow();
1727         return;
1728     }
1729     if (token.name() == tableTag) {
1730         if (!processTrEndTagForInRow()) {
1731             ASSERT(isParsingFragment());
1732             return;
1733         }
1734         ASSERT(insertionMode() == InTableBodyMode);
1735         processEndTag(token);
1736         return;
1737     }
1738     if (isTableBodyContextTag(token.name())) {
1739         if (!m_tree.openElements()->inTableScope(token.name())) {
1740             parseError(token);
1741             return;
1742         }
1743         processFakeEndTag(trTag);
1744         ASSERT(insertionMode() == InTableBodyMode);
1745         processEndTag(token);
1746         return;
1747     }
1748     if (token.name() == bodyTag
1749         || isCaptionColOrColgroupTag(token.name())
1750         || token.name() == htmlTag
1751         || isTableCellContextTag(token.name())) {
1752         parseError(token);
1753         return;
1754     }
1755     processEndTagForInTable(token);
1756 }
1757
1758 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1759 {
1760     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1761     if (isTableCellContextTag(token.name())) {
1762         if (!m_tree.openElements()->inTableScope(token.name())) {
1763             parseError(token);
1764             return;
1765         }
1766         m_tree.generateImpliedEndTags();
1767         if (!m_tree.currentNode()->hasLocalName(token.name()))
1768             parseError(token);
1769         m_tree.openElements()->popUntilPopped(token.name());
1770         m_tree.activeFormattingElements()->clearToLastMarker();
1771         setInsertionMode(InRowMode);
1772         return;
1773     }
1774     if (token.name() == bodyTag
1775         || isCaptionColOrColgroupTag(token.name())
1776         || token.name() == htmlTag) {
1777         parseError(token);
1778         return;
1779     }
1780     if (token.name() == tableTag
1781         || token.name() == trTag
1782         || isTableBodyContextTag(token.name())) {
1783         if (!m_tree.openElements()->inTableScope(token.name())) {
1784             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1785             parseError(token);
1786             return;
1787         }
1788         closeTheCell();
1789         processEndTag(token);
1790         return;
1791     }
1792     processEndTagForInBody(token);
1793 }
1794
1795 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1796 {
1797     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1798     if (token.name() == bodyTag) {
1799         processBodyEndTagForInBody(token);
1800         return;
1801     }
1802     if (token.name() == htmlTag) {
1803         AtomicHTMLToken endBody(HTMLTokenTypes::EndTag, bodyTag.localName());
1804         if (processBodyEndTagForInBody(endBody))
1805             processEndTag(token);
1806         return;
1807     }
1808     if (token.name() == addressTag
1809         || token.name() == articleTag
1810         || token.name() == asideTag
1811         || token.name() == blockquoteTag
1812         || token.name() == buttonTag
1813         || token.name() == centerTag
1814         || token.name() == detailsTag
1815         || token.name() == dirTag
1816         || token.name() == divTag
1817         || token.name() == dlTag
1818         || token.name() == fieldsetTag
1819         || token.name() == figcaptionTag
1820         || token.name() == figureTag
1821         || token.name() == footerTag
1822         || token.name() == headerTag
1823         || token.name() == hgroupTag
1824         || token.name() == listingTag
1825         || token.name() == menuTag
1826         || token.name() == navTag
1827         || token.name() == olTag
1828         || token.name() == preTag
1829         || token.name() == sectionTag
1830         || token.name() == summaryTag
1831         || token.name() == ulTag) {
1832         if (!m_tree.openElements()->inScope(token.name())) {
1833             parseError(token);
1834             return;
1835         }
1836         m_tree.generateImpliedEndTags();
1837         if (!m_tree.currentNode()->hasLocalName(token.name()))
1838             parseError(token);
1839         m_tree.openElements()->popUntilPopped(token.name());
1840         return;
1841     }
1842     if (token.name() == formTag) {
1843         RefPtr<Element> node = m_tree.takeForm();
1844         if (!node || !m_tree.openElements()->inScope(node.get())) {
1845             parseError(token);
1846             return;
1847         }
1848         m_tree.generateImpliedEndTags();
1849         if (m_tree.currentElement() != node.get())
1850             parseError(token);
1851         m_tree.openElements()->remove(node.get());
1852     }
1853     if (token.name() == pTag) {
1854         if (!m_tree.openElements()->inButtonScope(token.name())) {
1855             parseError(token);
1856             processFakeStartTag(pTag);
1857             ASSERT(m_tree.openElements()->inScope(token.name()));
1858             processEndTag(token);
1859             return;
1860         }
1861         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1862         if (!m_tree.currentNode()->hasLocalName(token.name()))
1863             parseError(token);
1864         m_tree.openElements()->popUntilPopped(token.name());
1865         return;
1866     }
1867     if (token.name() == liTag) {
1868         if (!m_tree.openElements()->inListItemScope(token.name())) {
1869             parseError(token);
1870             return;
1871         }
1872         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1873         if (!m_tree.currentNode()->hasLocalName(token.name()))
1874             parseError(token);
1875         m_tree.openElements()->popUntilPopped(token.name());
1876         return;
1877     }
1878     if (token.name() == ddTag
1879         || token.name() == dtTag) {
1880         if (!m_tree.openElements()->inScope(token.name())) {
1881             parseError(token);
1882             return;
1883         }
1884         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1885         if (!m_tree.currentNode()->hasLocalName(token.name()))
1886             parseError(token);
1887         m_tree.openElements()->popUntilPopped(token.name());
1888         return;
1889     }
1890     if (isNumberedHeaderTag(token.name())) {
1891         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1892             parseError(token);
1893             return;
1894         }
1895         m_tree.generateImpliedEndTags();
1896         if (!m_tree.currentNode()->hasLocalName(token.name()))
1897             parseError(token);
1898         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1899         return;
1900     }
1901     if (isFormattingTag(token.name())) {
1902         callTheAdoptionAgency(token);
1903         return;
1904     }
1905     if (token.name() == appletTag
1906         || token.name() == marqueeTag
1907         || token.name() == objectTag) {
1908         if (!m_tree.openElements()->inScope(token.name())) {
1909             parseError(token);
1910             return;
1911         }
1912         m_tree.generateImpliedEndTags();
1913         if (!m_tree.currentNode()->hasLocalName(token.name()))
1914             parseError(token);
1915         m_tree.openElements()->popUntilPopped(token.name());
1916         m_tree.activeFormattingElements()->clearToLastMarker();
1917         return;
1918     }
1919     if (token.name() == brTag) {
1920         parseError(token);
1921         processFakeStartTag(brTag);
1922         return;
1923     }
1924     processAnyOtherEndTagForInBody(token);
1925 }
1926
1927 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1928 {
1929     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1930         ASSERT(isParsingFragment());
1931         // FIXME: parse error
1932         return false;
1933     }
1934     m_tree.generateImpliedEndTags();
1935     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1936     m_tree.openElements()->popUntilPopped(captionTag.localName());
1937     m_tree.activeFormattingElements()->clearToLastMarker();
1938     setInsertionMode(InTableMode);
1939     return true;
1940 }
1941
1942 bool HTMLTreeBuilder::processTrEndTagForInRow()
1943 {
1944     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1945         ASSERT(isParsingFragment());
1946         // FIXME: parse error
1947         return false;
1948     }
1949     m_tree.openElements()->popUntilTableRowScopeMarker();
1950     ASSERT(m_tree.currentElement()->hasTagName(trTag));
1951     m_tree.openElements()->pop();
1952     setInsertionMode(InTableBodyMode);
1953     return true;
1954 }
1955
1956 bool HTMLTreeBuilder::processTableEndTagForInTable()
1957 {
1958     if (!m_tree.openElements()->inTableScope(tableTag)) {
1959         ASSERT(isParsingFragment());
1960         // FIXME: parse error.
1961         return false;
1962     }
1963     m_tree.openElements()->popUntilPopped(tableTag.localName());
1964     resetInsertionModeAppropriately();
1965     return true;
1966 }
1967
1968 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1969 {
1970     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1971     if (token.name() == tableTag) {
1972         processTableEndTagForInTable();
1973         return;
1974     }
1975     if (token.name() == bodyTag
1976         || isCaptionColOrColgroupTag(token.name())
1977         || token.name() == htmlTag
1978         || isTableBodyContextTag(token.name())
1979         || isTableCellContextTag(token.name())
1980         || token.name() == trTag) {
1981         parseError(token);
1982         return;
1983     }
1984     // Is this redirection necessary here?
1985     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1986     processEndTagForInBody(token);
1987 }
1988
1989 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
1990 {
1991     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1992     switch (insertionMode()) {
1993     case InitialMode:
1994         ASSERT(insertionMode() == InitialMode);
1995         defaultForInitial();
1996         // Fall through.
1997     case BeforeHTMLMode:
1998         ASSERT(insertionMode() == BeforeHTMLMode);
1999         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2000             parseError(token);
2001             return;
2002         }
2003         defaultForBeforeHTML();
2004         // Fall through.
2005     case BeforeHeadMode:
2006         ASSERT(insertionMode() == BeforeHeadMode);
2007         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2008             parseError(token);
2009             return;
2010         }
2011         defaultForBeforeHead();
2012         // Fall through.
2013     case InHeadMode:
2014         ASSERT(insertionMode() == InHeadMode);
2015         if (token.name() == headTag) {
2016             m_tree.openElements()->popHTMLHeadElement();
2017             setInsertionMode(AfterHeadMode);
2018             return;
2019         }
2020         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2021             parseError(token);
2022             return;
2023         }
2024         defaultForInHead();
2025         // Fall through.
2026     case AfterHeadMode:
2027         ASSERT(insertionMode() == AfterHeadMode);
2028         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2029             parseError(token);
2030             return;
2031         }
2032         defaultForAfterHead();
2033         // Fall through
2034     case InBodyMode:
2035         ASSERT(insertionMode() == InBodyMode);
2036         processEndTagForInBody(token);
2037         break;
2038     case InTableMode:
2039         ASSERT(insertionMode() == InTableMode);
2040         processEndTagForInTable(token);
2041         break;
2042     case InCaptionMode:
2043         ASSERT(insertionMode() == InCaptionMode);
2044         if (token.name() == captionTag) {
2045             processCaptionEndTagForInCaption();
2046             return;
2047         }
2048         if (token.name() == tableTag) {
2049             parseError(token);
2050             if (!processCaptionEndTagForInCaption()) {
2051                 ASSERT(isParsingFragment());
2052                 return;
2053             }
2054             processEndTag(token);
2055             return;
2056         }
2057         if (token.name() == bodyTag
2058             || token.name() == colTag
2059             || token.name() == colgroupTag
2060             || token.name() == htmlTag
2061             || isTableBodyContextTag(token.name())
2062             || isTableCellContextTag(token.name())
2063             || token.name() == trTag) {
2064             parseError(token);
2065             return;
2066         }
2067         processEndTagForInBody(token);
2068         break;
2069     case InColumnGroupMode:
2070         ASSERT(insertionMode() == InColumnGroupMode);
2071         if (token.name() == colgroupTag) {
2072             processColgroupEndTagForInColumnGroup();
2073             return;
2074         }
2075         if (token.name() == colTag) {
2076             parseError(token);
2077             return;
2078         }
2079         if (!processColgroupEndTagForInColumnGroup()) {
2080             ASSERT(isParsingFragment());
2081             return;
2082         }
2083         processEndTag(token);
2084         break;
2085     case InRowMode:
2086         ASSERT(insertionMode() == InRowMode);
2087         processEndTagForInRow(token);
2088         break;
2089     case InCellMode:
2090         ASSERT(insertionMode() == InCellMode);
2091         processEndTagForInCell(token);
2092         break;
2093     case InTableBodyMode:
2094         ASSERT(insertionMode() == InTableBodyMode);
2095         processEndTagForInTableBody(token);
2096         break;
2097     case AfterBodyMode:
2098         ASSERT(insertionMode() == AfterBodyMode);
2099         if (token.name() == htmlTag) {
2100             if (isParsingFragment()) {
2101                 parseError(token);
2102                 return;
2103             }
2104             setInsertionMode(AfterAfterBodyMode);
2105             return;
2106         }
2107         // Fall through.
2108     case AfterAfterBodyMode:
2109         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2110         parseError(token);
2111         setInsertionMode(InBodyMode);
2112         processEndTag(token);
2113         break;
2114     case InHeadNoscriptMode:
2115         ASSERT(insertionMode() == InHeadNoscriptMode);
2116         if (token.name() == noscriptTag) {
2117             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2118             m_tree.openElements()->pop();
2119             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2120             setInsertionMode(InHeadMode);
2121             return;
2122         }
2123         if (token.name() != brTag) {
2124             parseError(token);
2125             return;
2126         }
2127         defaultForInHeadNoscript();
2128         processToken(token);
2129         break;
2130     case TextMode:
2131         if (token.name() == scriptTag) {
2132             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2133             m_isPaused = true;
2134             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2135             m_scriptToProcess = m_tree.currentElement();
2136             m_scriptToProcessStartPosition = m_lastScriptElementStartPosition;
2137             m_tree.openElements()->pop();
2138             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2139                 m_scriptToProcess->removeAllChildren();
2140             setInsertionMode(m_originalInsertionMode);
2141
2142             // This token will not have been created by the tokenizer if a
2143             // self-closing script tag was encountered and pre-HTML5 parser
2144             // quirks are enabled. We must set the tokenizer's state to
2145             // DataState explicitly if the tokenizer didn't have a chance to.
2146             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
2147             m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
2148             return;
2149         }
2150         m_tree.openElements()->pop();
2151         setInsertionMode(m_originalInsertionMode);
2152         break;
2153     case InFramesetMode:
2154         ASSERT(insertionMode() == InFramesetMode);
2155         if (token.name() == framesetTag) {
2156             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2157                 parseError(token);
2158                 return;
2159             }
2160             m_tree.openElements()->pop();
2161             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2162                 setInsertionMode(AfterFramesetMode);
2163             return;
2164         }
2165         break;
2166     case AfterFramesetMode:
2167         ASSERT(insertionMode() == AfterFramesetMode);
2168         if (token.name() == htmlTag) {
2169             setInsertionMode(AfterAfterFramesetMode);
2170             return;
2171         }
2172         // Fall through.
2173     case AfterAfterFramesetMode:
2174         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2175         parseError(token);
2176         break;
2177     case InSelectInTableMode:
2178         ASSERT(insertionMode() == InSelectInTableMode);
2179         if (token.name() == captionTag
2180             || token.name() == tableTag
2181             || isTableBodyContextTag(token.name())
2182             || token.name() == trTag
2183             || isTableCellContextTag(token.name())) {
2184             parseError(token);
2185             if (m_tree.openElements()->inTableScope(token.name())) {
2186                 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
2187                 processEndTag(endSelect);
2188                 processEndTag(token);
2189             }
2190             return;
2191         }
2192         // Fall through.
2193     case InSelectMode:
2194         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2195         if (token.name() == optgroupTag) {
2196             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2197                 processFakeEndTag(optionTag);
2198             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2199                 m_tree.openElements()->pop();
2200                 return;
2201             }
2202             parseError(token);
2203             return;
2204         }
2205         if (token.name() == optionTag) {
2206             if (m_tree.currentNode()->hasTagName(optionTag)) {
2207                 m_tree.openElements()->pop();
2208                 return;
2209             }
2210             parseError(token);
2211             return;
2212         }
2213         if (token.name() == selectTag) {
2214             if (!m_tree.openElements()->inSelectScope(token.name())) {
2215                 ASSERT(isParsingFragment());
2216                 parseError(token);
2217                 return;
2218             }
2219             m_tree.openElements()->popUntilPopped(selectTag.localName());
2220             resetInsertionModeAppropriately();
2221             return;
2222         }
2223         break;
2224     case InTableTextMode:
2225         defaultForInTableText();
2226         processEndTag(token);
2227         break;
2228     }
2229 }
2230
2231 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2232 {
2233     ASSERT(token.type() == HTMLTokenTypes::Comment);
2234     if (m_insertionMode == InitialMode
2235         || m_insertionMode == BeforeHTMLMode
2236         || m_insertionMode == AfterAfterBodyMode
2237         || m_insertionMode == AfterAfterFramesetMode) {
2238         m_tree.insertCommentOnDocument(token);
2239         return;
2240     }
2241     if (m_insertionMode == AfterBodyMode) {
2242         m_tree.insertCommentOnHTMLHtmlElement(token);
2243         return;
2244     }
2245     if (m_insertionMode == InTableTextMode) {
2246         defaultForInTableText();
2247         processComment(token);
2248         return;
2249     }
2250     m_tree.insertComment(token);
2251 }
2252
2253 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2254 {
2255     ASSERT(token.type() == HTMLTokenTypes::Character);
2256     ExternalCharacterTokenBuffer buffer(token);
2257     processCharacterBuffer(buffer);
2258 }
2259
2260 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2261 {
2262 ReprocessBuffer:
2263     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2264     // Note that this logic is different than the generic \r\n collapsing
2265     // handled in the input stream preprocessor. This logic is here as an
2266     // "authoring convenience" so folks can write:
2267     //
2268     // <pre>
2269     // lorem ipsum
2270     // lorem ipsum
2271     // </pre>
2272     //
2273     // without getting an extra newline at the start of their <pre> element.
2274     if (m_shouldSkipLeadingNewline) {
2275         m_shouldSkipLeadingNewline = false;
2276         buffer.skipAtMostOneLeadingNewline();
2277         if (buffer.isEmpty())
2278             return;
2279     }
2280
2281     switch (insertionMode()) {
2282     case InitialMode: {
2283         ASSERT(insertionMode() == InitialMode);
2284         buffer.skipLeadingWhitespace();
2285         if (buffer.isEmpty())
2286             return;
2287         defaultForInitial();
2288         // Fall through.
2289     }
2290     case BeforeHTMLMode: {
2291         ASSERT(insertionMode() == BeforeHTMLMode);
2292         buffer.skipLeadingWhitespace();
2293         if (buffer.isEmpty())
2294             return;
2295         defaultForBeforeHTML();
2296         // Fall through.
2297     }
2298     case BeforeHeadMode: {
2299         ASSERT(insertionMode() == BeforeHeadMode);
2300         buffer.skipLeadingWhitespace();
2301         if (buffer.isEmpty())
2302             return;
2303         defaultForBeforeHead();
2304         // Fall through.
2305     }
2306     case InHeadMode: {
2307         ASSERT(insertionMode() == InHeadMode);
2308         String leadingWhitespace = buffer.takeLeadingWhitespace();
2309         if (!leadingWhitespace.isEmpty())
2310             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2311         if (buffer.isEmpty())
2312             return;
2313         defaultForInHead();
2314         // Fall through.
2315     }
2316     case AfterHeadMode: {
2317         ASSERT(insertionMode() == AfterHeadMode);
2318         String leadingWhitespace = buffer.takeLeadingWhitespace();
2319         if (!leadingWhitespace.isEmpty())
2320             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2321         if (buffer.isEmpty())
2322             return;
2323         defaultForAfterHead();
2324         // Fall through.
2325     }
2326     case InBodyMode:
2327     case InCaptionMode:
2328     case InCellMode: {
2329         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2330         processCharacterBufferForInBody(buffer);
2331         break;
2332     }
2333     case InTableMode:
2334     case InTableBodyMode:
2335     case InRowMode: {
2336         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2337         ASSERT(m_pendingTableCharacters.isEmpty());
2338         if (m_tree.currentNode()->isElementNode()
2339             && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
2340                 || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
2341                 || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
2342                 || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
2343                 || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
2344             m_originalInsertionMode = m_insertionMode;
2345             setInsertionMode(InTableTextMode);
2346             // Note that we fall through to the InTableTextMode case below.
2347         } else {
2348             HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2349             processCharacterBufferForInBody(buffer);
2350             break;
2351         }
2352         // Fall through.
2353     }
2354     case InTableTextMode: {
2355         buffer.giveRemainingTo(m_pendingTableCharacters);
2356         break;
2357     }
2358     case InColumnGroupMode: {
2359         ASSERT(insertionMode() == InColumnGroupMode);
2360         String leadingWhitespace = buffer.takeLeadingWhitespace();
2361         if (!leadingWhitespace.isEmpty())
2362             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2363         if (buffer.isEmpty())
2364             return;
2365         if (!processColgroupEndTagForInColumnGroup()) {
2366             ASSERT(isParsingFragment());
2367             // The spec tells us to drop these characters on the floor.
2368             buffer.skipLeadingNonWhitespace();
2369             if (buffer.isEmpty())
2370                 return;
2371         }
2372         goto ReprocessBuffer;
2373     }
2374     case AfterBodyMode:
2375     case AfterAfterBodyMode: {
2376         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2377         // FIXME: parse error
2378         setInsertionMode(InBodyMode);
2379         goto ReprocessBuffer;
2380         break;
2381     }
2382     case TextMode: {
2383         ASSERT(insertionMode() == TextMode);
2384         m_tree.insertTextNode(buffer.takeRemaining());
2385         break;
2386     }
2387     case InHeadNoscriptMode: {
2388         ASSERT(insertionMode() == InHeadNoscriptMode);
2389         String leadingWhitespace = buffer.takeLeadingWhitespace();
2390         if (!leadingWhitespace.isEmpty())
2391             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2392         if (buffer.isEmpty())
2393             return;
2394         defaultForInHeadNoscript();
2395         goto ReprocessBuffer;
2396         break;
2397     }
2398     case InFramesetMode:
2399     case AfterFramesetMode: {
2400         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2401         String leadingWhitespace = buffer.takeRemainingWhitespace();
2402         if (!leadingWhitespace.isEmpty())
2403             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2404         // FIXME: We should generate a parse error if we skipped over any
2405         // non-whitespace characters.
2406         break;
2407     }
2408     case InSelectInTableMode:
2409     case InSelectMode: {
2410         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2411         m_tree.insertTextNode(buffer.takeRemaining());
2412         break;
2413     }
2414     case AfterAfterFramesetMode: {
2415         String leadingWhitespace = buffer.takeRemainingWhitespace();
2416         if (!leadingWhitespace.isEmpty()) {
2417             m_tree.reconstructTheActiveFormattingElements();
2418             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2419         }
2420         // FIXME: We should generate a parse error if we skipped over any
2421         // non-whitespace characters.
2422         break;
2423     }
2424     }
2425 }
2426
2427 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2428 {
2429     m_tree.reconstructTheActiveFormattingElements();
2430     String characters = buffer.takeRemaining();
2431     m_tree.insertTextNode(characters);
2432     if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2433         m_framesetOk = false;
2434 }
2435
2436 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2437 {
2438     ASSERT(token.type() == HTMLTokenTypes::EndOfFile);
2439     switch (insertionMode()) {
2440     case InitialMode:
2441         ASSERT(insertionMode() == InitialMode);
2442         defaultForInitial();
2443         // Fall through.
2444     case BeforeHTMLMode:
2445         ASSERT(insertionMode() == BeforeHTMLMode);
2446         defaultForBeforeHTML();
2447         // Fall through.
2448     case BeforeHeadMode:
2449         ASSERT(insertionMode() == BeforeHeadMode);
2450         defaultForBeforeHead();
2451         // Fall through.
2452     case InHeadMode:
2453         ASSERT(insertionMode() == InHeadMode);
2454         defaultForInHead();
2455         // Fall through.
2456     case AfterHeadMode:
2457         ASSERT(insertionMode() == AfterHeadMode);
2458         defaultForAfterHead();
2459         // Fall through
2460     case InBodyMode:
2461     case InCellMode:
2462     case InCaptionMode:
2463     case InRowMode:
2464         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2465         notImplemented(); // Emit parse error based on what elements are still open.
2466         break;
2467     case AfterBodyMode:
2468     case AfterAfterBodyMode:
2469         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2470         break;
2471     case InHeadNoscriptMode:
2472         ASSERT(insertionMode() == InHeadNoscriptMode);
2473         defaultForInHeadNoscript();
2474         processEndOfFile(token);
2475         return;
2476     case AfterFramesetMode:
2477     case AfterAfterFramesetMode:
2478         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2479         break;
2480     case InFramesetMode:
2481     case InTableMode:
2482     case InTableBodyMode:
2483     case InSelectInTableMode:
2484     case InSelectMode:
2485         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2486         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2487             parseError(token);
2488         break;
2489     case InColumnGroupMode:
2490         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2491             ASSERT(isParsingFragment());
2492             return; // FIXME: Should we break here instead of returning?
2493         }
2494         if (!processColgroupEndTagForInColumnGroup()) {
2495             ASSERT(isParsingFragment());
2496             return; // FIXME: Should we break here instead of returning?
2497         }
2498         processEndOfFile(token);
2499         return;
2500     case InTableTextMode:
2501         defaultForInTableText();
2502         processEndOfFile(token);
2503         return;
2504     case TextMode:
2505         parseError(token);
2506         if (m_tree.currentNode()->hasTagName(scriptTag))
2507             notImplemented(); // mark the script element as "already started".
2508         m_tree.openElements()->pop();
2509         ASSERT(m_originalInsertionMode != TextMode);
2510         setInsertionMode(m_originalInsertionMode);
2511         processEndOfFile(token);
2512         return;
2513     }
2514     ASSERT(m_tree.currentNode());
2515     m_tree.openElements()->popAll();
2516 }
2517
2518 void HTMLTreeBuilder::defaultForInitial()
2519 {
2520     notImplemented();
2521     if (!m_fragmentContext.fragment() && !m_document->isSrcdocDocument())
2522         m_document->setCompatibilityMode(Document::QuirksMode);
2523     // FIXME: parse error
2524     setInsertionMode(BeforeHTMLMode);
2525 }
2526
2527 void HTMLTreeBuilder::defaultForBeforeHTML()
2528 {
2529     AtomicHTMLToken startHTML(HTMLTokenTypes::StartTag, htmlTag.localName());
2530     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2531     setInsertionMode(BeforeHeadMode);
2532 }
2533
2534 void HTMLTreeBuilder::defaultForBeforeHead()
2535 {
2536     AtomicHTMLToken startHead(HTMLTokenTypes::StartTag, headTag.localName());
2537     processStartTag(startHead);
2538 }
2539
2540 void HTMLTreeBuilder::defaultForInHead()
2541 {
2542     AtomicHTMLToken endHead(HTMLTokenTypes::EndTag, headTag.localName());
2543     processEndTag(endHead);
2544 }
2545
2546 void HTMLTreeBuilder::defaultForInHeadNoscript()
2547 {
2548     AtomicHTMLToken endNoscript(HTMLTokenTypes::EndTag, noscriptTag.localName());
2549     processEndTag(endNoscript);
2550 }
2551
2552 void HTMLTreeBuilder::defaultForAfterHead()
2553 {
2554     AtomicHTMLToken startBody(HTMLTokenTypes::StartTag, bodyTag.localName());
2555     processStartTag(startBody);
2556     m_framesetOk = true;
2557 }
2558
2559 void HTMLTreeBuilder::defaultForInTableText()
2560 {
2561     String characters = m_pendingTableCharacters.toString();
2562     m_pendingTableCharacters.clear();
2563     if (!isAllWhitespace(characters)) {
2564         // FIXME: parse error
2565         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2566         m_tree.reconstructTheActiveFormattingElements();
2567         m_tree.insertTextNode(characters, NotAllWhitespace);
2568         m_framesetOk = false;
2569         setInsertionMode(m_originalInsertionMode);
2570         return;
2571     }
2572     m_tree.insertTextNode(characters);
2573     setInsertionMode(m_originalInsertionMode);
2574 }
2575
2576 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2577 {
2578     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2579     if (token.name() == htmlTag) {
2580         m_tree.insertHTMLHtmlStartTagInBody(token);
2581         return true;
2582     }
2583     if (token.name() == baseTag
2584         || token.name() == basefontTag
2585         || token.name() == bgsoundTag
2586         || token.name() == commandTag
2587         || token.name() == linkTag
2588         || token.name() == metaTag) {
2589         m_tree.insertSelfClosingHTMLElement(token);
2590         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2591         return true;
2592     }
2593     if (token.name() == titleTag) {
2594         processGenericRCDATAStartTag(token);
2595         return true;
2596     }
2597     if (token.name() == noscriptTag) {
2598         if (scriptEnabled(m_document->frame())) {
2599             processGenericRawTextStartTag(token);
2600             return true;
2601         }
2602         m_tree.insertHTMLElement(token);
2603         setInsertionMode(InHeadNoscriptMode);
2604         return true;
2605     }
2606     if (token.name() == noframesTag || token.name() == styleTag) {
2607         processGenericRawTextStartTag(token);
2608         return true;
2609     }
2610     if (token.name() == scriptTag) {
2611         processScriptStartTag(token);
2612         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2613             processFakeEndTag(scriptTag);
2614         return true;
2615     }
2616     if (token.name() == headTag) {
2617         parseError(token);
2618         return true;
2619     }
2620     return false;
2621 }
2622
2623 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2624 {
2625     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2626     m_tree.insertHTMLElement(token);
2627     m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
2628     m_originalInsertionMode = m_insertionMode;
2629     setInsertionMode(TextMode);
2630 }
2631
2632 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2633 {
2634     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2635     m_tree.insertHTMLElement(token);
2636     m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
2637     m_originalInsertionMode = m_insertionMode;
2638     setInsertionMode(TextMode);
2639 }
2640
2641 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2642 {
2643     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2644     m_tree.insertScriptElement(token);
2645     m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
2646     m_originalInsertionMode = m_insertionMode;
2647
2648     TextPosition position = m_parser->textPosition();
2649
2650     ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
2651
2652     m_lastScriptElementStartPosition = position;
2653
2654     setInsertionMode(TextMode);
2655 }
2656
2657 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2658 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken& token)
2659 {
2660     if (m_tree.isEmpty())
2661         return false;
2662     ContainerNode* node = m_tree.currentNode();
2663     if (isInHTMLNamespace(node))
2664         return false;
2665     if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
2666         if (token.type() == HTMLTokenTypes::StartTag
2667             && token.name() != MathMLNames::mglyphTag
2668             && token.name() != MathMLNames::malignmarkTag)
2669             return false;
2670         if (token.type() == HTMLTokenTypes::Character)
2671             return false;
2672     }
2673     if (node->hasTagName(MathMLNames::annotation_xmlTag)
2674         && token.type() == HTMLTokenTypes::StartTag
2675         && token.name() == SVGNames::svgTag)
2676         return false;
2677     if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
2678         if (token.type() == HTMLTokenTypes::StartTag)
2679             return false;
2680         if (token.type() == HTMLTokenTypes::Character)
2681             return false;
2682     }
2683     if (token.type() == HTMLTokenTypes::EndOfFile)
2684         return false;
2685     return true;
2686 }
2687
2688 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken& token)
2689 {
2690     switch (token.type()) {
2691     case HTMLTokenTypes::Uninitialized:
2692         ASSERT_NOT_REACHED();
2693         break;
2694     case HTMLTokenTypes::DOCTYPE:
2695         parseError(token);
2696         break;
2697     case HTMLTokenTypes::StartTag: {
2698         if (token.name() == bTag
2699             || token.name() == bigTag
2700             || token.name() == blockquoteTag
2701             || token.name() == bodyTag
2702             || token.name() == brTag
2703             || token.name() == centerTag
2704             || token.name() == codeTag
2705             || token.name() == ddTag
2706             || token.name() == divTag
2707             || token.name() == dlTag
2708             || token.name() == dtTag
2709             || token.name() == emTag
2710             || token.name() == embedTag
2711             || isNumberedHeaderTag(token.name())
2712             || token.name() == headTag
2713             || token.name() == hrTag
2714             || token.name() == iTag
2715             || token.name() == imgTag
2716             || token.name() == liTag
2717             || token.name() == listingTag
2718             || token.name() == menuTag
2719             || token.name() == metaTag
2720             || token.name() == nobrTag
2721             || token.name() == olTag
2722             || token.name() == pTag
2723             || token.name() == preTag
2724             || token.name() == rubyTag
2725             || token.name() == sTag
2726             || token.name() == smallTag
2727             || token.name() == spanTag
2728             || token.name() == strongTag
2729             || token.name() == strikeTag
2730             || token.name() == subTag
2731             || token.name() == supTag
2732             || token.name() == tableTag
2733             || token.name() == ttTag
2734             || token.name() == uTag
2735             || token.name() == ulTag
2736             || token.name() == varTag
2737             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
2738             parseError(token);
2739             m_tree.openElements()->popUntilForeignContentScopeMarker();
2740             processStartTag(token);
2741             return;
2742         }
2743         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
2744         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2745             adjustMathMLAttributes(token);
2746         if (currentNamespace == SVGNames::svgNamespaceURI) {
2747             adjustSVGTagNameCase(token);
2748             adjustSVGAttributes(token);
2749         }
2750         adjustForeignAttributes(token);
2751         m_tree.insertForeignElement(token, currentNamespace);
2752         break;
2753     }
2754     case HTMLTokenTypes::EndTag: {
2755         if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
2756             adjustSVGTagNameCase(token);
2757
2758         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2759             m_isPaused = true;
2760             m_scriptToProcess = m_tree.currentElement();
2761             m_tree.openElements()->pop();
2762             return;
2763         }
2764         if (!isInHTMLNamespace(m_tree.currentNode())) {
2765             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2766             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2767             if (!nodeRecord->node()->hasLocalName(token.name()))
2768                 parseError(token);
2769             while (1) {
2770                 if (nodeRecord->node()->hasLocalName(token.name())) {
2771                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2772                     return;
2773                 }
2774                 nodeRecord = nodeRecord->next();
2775
2776                 if (isInHTMLNamespace(nodeRecord->node()))
2777                     break;
2778             }
2779         }
2780         // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2781         processEndTag(token);
2782         break;
2783     }
2784     case HTMLTokenTypes::Comment:
2785         m_tree.insertComment(token);
2786         return;
2787     case HTMLTokenTypes::Character: {
2788         String characters = String(token.characters().data(), token.characters().size());
2789         m_tree.insertTextNode(characters);
2790         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2791             m_framesetOk = false;
2792         break;
2793     }
2794     case HTMLTokenTypes::EndOfFile:
2795         ASSERT_NOT_REACHED();
2796         break;
2797     }
2798 }
2799
2800 void HTMLTreeBuilder::finished()
2801 {
2802     if (isParsingFragment())
2803         return;
2804     
2805     ASSERT(m_document);
2806     // Warning, this may detach the parser. Do not do anything else after this.
2807     m_document->finishedParsing();
2808 }
2809
2810 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2811 {
2812 }
2813
2814 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2815 {
2816     if (!frame)
2817         return false;
2818     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2819 }
2820
2821 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2822 {
2823     if (!frame)
2824         return false;
2825     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2826 }
2827
2828 }