9d839120dbac77c0d20b3a9b1c3ab91105672e68
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 static const int uninitializedLineNumberValue = -1;
60
61 static TextPosition uninitializedPositionValue1()
62 {
63     return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
64 }
65
66 namespace {
67
68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70     return isHTMLSpace(character) || character == replacementCharacter;
71 }
72
73 inline bool isAllWhitespace(const String& string)
74 {
75     return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77
78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82
83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85     return tagName == h1Tag
86         || tagName == h2Tag
87         || tagName == h3Tag
88         || tagName == h4Tag
89         || tagName == h5Tag
90         || tagName == h6Tag;
91 }
92
93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95     return tagName == captionTag
96         || tagName == colTag
97         || tagName == colgroupTag;
98 }
99
100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102     return tagName == thTag || tagName == tdTag;
103 }
104
105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107     return tagName == tbodyTag
108         || tagName == tfootTag
109         || tagName == theadTag;
110 }
111
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
113 bool isSpecialNode(Node* node)
114 {
115     if (node->hasTagName(MathMLNames::miTag)
116         || node->hasTagName(MathMLNames::moTag)
117         || node->hasTagName(MathMLNames::mnTag)
118         || node->hasTagName(MathMLNames::msTag)
119         || node->hasTagName(MathMLNames::mtextTag)
120         || node->hasTagName(MathMLNames::annotation_xmlTag)
121         || node->hasTagName(SVGNames::foreignObjectTag)
122         || node->hasTagName(SVGNames::descTag)
123         || node->hasTagName(SVGNames::titleTag))
124         return true;
125     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126         return true;
127     if (!isInHTMLNamespace(node))
128         return false;
129     const AtomicString& tagName = node->localName();
130     return tagName == addressTag
131         || tagName == appletTag
132         || tagName == areaTag
133         || tagName == articleTag
134         || tagName == asideTag
135         || tagName == baseTag
136         || tagName == basefontTag
137         || tagName == bgsoundTag
138         || tagName == blockquoteTag
139         || tagName == bodyTag
140         || tagName == brTag
141         || tagName == buttonTag
142         || tagName == captionTag
143         || tagName == centerTag
144         || tagName == colTag
145         || tagName == colgroupTag
146         || tagName == commandTag
147         || tagName == ddTag
148         || tagName == detailsTag
149         || tagName == dirTag
150         || tagName == divTag
151         || tagName == dlTag
152         || tagName == dtTag
153         || tagName == embedTag
154         || tagName == fieldsetTag
155         || tagName == figcaptionTag
156         || tagName == figureTag
157         || tagName == footerTag
158         || tagName == formTag
159         || tagName == frameTag
160         || tagName == framesetTag
161         || isNumberedHeaderTag(tagName)
162         || tagName == headTag
163         || tagName == headerTag
164         || tagName == hgroupTag
165         || tagName == hrTag
166         || tagName == htmlTag
167         || tagName == iframeTag
168         || tagName == imgTag
169         || tagName == inputTag
170         || tagName == isindexTag
171         || tagName == liTag
172         || tagName == linkTag
173         || tagName == listingTag
174         || tagName == marqueeTag
175         || tagName == menuTag
176         || tagName == metaTag
177         || tagName == navTag
178         || tagName == noembedTag
179         || tagName == noframesTag
180         || tagName == noscriptTag
181         || tagName == objectTag
182         || tagName == olTag
183         || tagName == pTag
184         || tagName == paramTag
185         || tagName == plaintextTag
186         || tagName == preTag
187         || tagName == scriptTag
188         || tagName == sectionTag
189         || tagName == selectTag
190         || tagName == styleTag
191         || tagName == summaryTag
192         || tagName == tableTag
193         || isTableBodyContextTag(tagName)
194         || tagName == tdTag
195         || tagName == textareaTag
196         || tagName == thTag
197         || tagName == titleTag
198         || tagName == trTag
199         || tagName == ulTag
200         || tagName == wbrTag
201         || tagName == xmpTag;
202 }
203
204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 {
206     return tagName == bTag
207         || tagName == bigTag
208         || tagName == codeTag
209         || tagName == emTag
210         || tagName == fontTag
211         || tagName == iTag
212         || tagName == sTag
213         || tagName == smallTag
214         || tagName == strikeTag
215         || tagName == strongTag
216         || tagName == ttTag
217         || tagName == uTag;
218 }
219
220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 {
222     return tagName == nobrTag
223         || isNonAnchorNonNobrFormattingTag(tagName);
224 }
225
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
227 bool isFormattingTag(const AtomicString& tagName)
228 {
229     return tagName == aTag || isNonAnchorFormattingTag(tagName);
230 }
231
232 HTMLFormElement* closestFormAncestor(Element* element)
233 {
234     while (element) {
235         if (element->hasTagName(formTag))
236             return static_cast<HTMLFormElement*>(element);
237         ContainerNode* parent = element->parentNode();
238         if (!parent || !parent->isElementNode())
239             return 0;
240         element = static_cast<Element*>(parent);
241     }
242     return 0;
243 }
244
245 } // namespace
246
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 public:
250     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251         : m_current(token.characters().data())
252         , m_end(m_current + token.characters().size())
253     {
254         ASSERT(!isEmpty());
255     }
256
257     explicit ExternalCharacterTokenBuffer(const String& string)
258         : m_current(string.characters())
259         , m_end(m_current + string.length())
260     {
261         ASSERT(!isEmpty());
262     }
263
264     ~ExternalCharacterTokenBuffer()
265     {
266         ASSERT(isEmpty());
267     }
268
269     bool isEmpty() const { return m_current == m_end; }
270
271     void skipAtMostOneLeadingNewline()
272     {
273         ASSERT(!isEmpty());
274         if (*m_current == '\n')
275             ++m_current;
276     }
277
278     void skipLeadingWhitespace()
279     {
280         skipLeading<isHTMLSpace>();
281     }
282
283     String takeLeadingWhitespace()
284     {
285         return takeLeading<isHTMLSpace>();
286     }
287
288     void skipLeadingNonWhitespace()
289     {
290         skipLeading<isNotHTMLSpace>();
291     }
292
293     String takeRemaining()
294     {
295         ASSERT(!isEmpty());
296         const UChar* start = m_current;
297         m_current = m_end;
298         return String(start, m_current - start);
299     }
300
301     void giveRemainingTo(StringBuilder& recipient)
302     {
303         recipient.append(m_current, m_end - m_current);
304         m_current = m_end;
305     }
306
307     String takeRemainingWhitespace()
308     {
309         ASSERT(!isEmpty());
310         Vector<UChar> whitespace;
311         do {
312             UChar cc = *m_current++;
313             if (isHTMLSpace(cc))
314                 whitespace.append(cc);
315         } while (m_current < m_end);
316         // Returning the null string when there aren't any whitespace
317         // characters is slightly cleaner semantically because we don't want
318         // to insert a text node (as opposed to inserting an empty text node).
319         if (whitespace.isEmpty())
320             return String();
321         return String::adopt(whitespace);
322     }
323
324 private:
325     template<bool characterPredicate(UChar)>
326     void skipLeading()
327     {
328         ASSERT(!isEmpty());
329         while (characterPredicate(*m_current)) {
330             if (++m_current == m_end)
331                 return;
332         }
333     }
334
335     template<bool characterPredicate(UChar)>
336     String takeLeading()
337     {
338         ASSERT(!isEmpty());
339         const UChar* start = m_current;
340         skipLeading<characterPredicate>();
341         if (start == m_current)
342             return String();
343         return String(start, m_current - start);
344     }
345
346     const UChar* m_current;
347     const UChar* m_end;
348 };
349
350
351 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
352     : m_framesetOk(true)
353     , m_document(document)
354     , m_tree(document, maximumDOMTreeDepth)
355     , m_reportErrors(reportErrors)
356     , m_isPaused(false)
357     , m_insertionMode(InitialMode)
358     , m_originalInsertionMode(InitialMode)
359     , m_shouldSkipLeadingNewline(false)
360     , m_parser(parser)
361     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
362     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
363     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
364 {
365 }
366
367 // FIXME: Member variables should be grouped into self-initializing structs to
368 // minimize code duplication between these constructors.
369 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
370     : m_framesetOk(true)
371     , m_fragmentContext(fragment, contextElement, scriptingPermission)
372     , m_document(fragment->document())
373     , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
374     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
375     , m_isPaused(false)
376     , m_insertionMode(InitialMode)
377     , m_originalInsertionMode(InitialMode)
378     , m_shouldSkipLeadingNewline(false)
379     , m_parser(parser)
380     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
381     , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
382     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
383 {
384     // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
385     ASSERT(contextElement);
386     if (contextElement) {
387         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
388         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
389         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
390         // and instead use the DocumentFragment as a root node.
391         m_tree.openElements()->pushRootNode(fragment);
392         resetInsertionModeAppropriately();
393         m_tree.setForm(closestFormAncestor(contextElement));
394     }
395 }
396
397 HTMLTreeBuilder::~HTMLTreeBuilder()
398 {
399 }
400
401 void HTMLTreeBuilder::detach()
402 {
403     // This call makes little sense in fragment mode, but for consistency
404     // DocumentParser expects detach() to always be called before it's destroyed.
405     m_document = 0;
406     // HTMLConstructionSite might be on the callstack when detach() is called
407     // otherwise we'd just call m_tree.clear() here instead.
408     m_tree.detach();
409 }
410
411 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
412     : m_fragment(0)
413     , m_contextElement(0)
414     , m_scriptingPermission(FragmentScriptingAllowed)
415 {
416 }
417
418 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
419     : m_fragment(fragment)
420     , m_contextElement(contextElement)
421     , m_scriptingPermission(scriptingPermission)
422 {
423     ASSERT(!fragment->hasChildNodes());
424 }
425
426 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
427 {
428 }
429
430 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
431 {
432     // Unpause ourselves, callers may pause us again when processing the script.
433     // The HTML5 spec is written as though scripts are executed inside the tree
434     // builder.  We pause the parser to exit the tree builder, and then resume
435     // before running scripts.
436     m_isPaused = false;
437     scriptStartPosition = m_scriptToProcessStartPosition;
438     m_scriptToProcessStartPosition = uninitializedPositionValue1();
439     return m_scriptToProcess.release();
440 }
441
442 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
443 {
444     AtomicHTMLToken token(rawToken);
445
446     // We clear the rawToken in case constructTreeFromAtomicToken
447     // synchronously re-enters the parser. We don't clear the token immedately
448     // for Character tokens because the AtomicHTMLToken avoids copying the
449     // characters by keeping a pointer to the underlying buffer in the
450     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
451     // the parser.
452     //
453     // FIXME: Stop clearing the rawToken once we start running the parser off
454     // the main thread or once we stop allowing synchronous JavaScript
455     // execution from parseAttribute.
456     if (rawToken.type() != HTMLTokenTypes::Character)
457         rawToken.clear();
458
459     constructTreeFromAtomicToken(token);
460
461     if (!rawToken.isUninitialized()) {
462         ASSERT(rawToken.type() == HTMLTokenTypes::Character);
463         rawToken.clear();
464     }
465 }
466
467 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
468 {
469     if (shouldProcessTokenInForeignContent(token))
470         processTokenInForeignContent(token);
471     else
472         processToken(token);
473
474     bool inForeignContent = !m_tree.isEmpty()
475         && !isInHTMLNamespace(m_tree.currentNode())
476         && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentNode())
477         && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentNode());
478
479     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
480     m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
481
482     m_tree.executeQueuedTasks();
483     // We might be detached now.
484 }
485
486 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
487 {
488     switch (token.type()) {
489     case HTMLTokenTypes::Uninitialized:
490         ASSERT_NOT_REACHED();
491         break;
492     case HTMLTokenTypes::DOCTYPE:
493         m_shouldSkipLeadingNewline = false;
494         processDoctypeToken(token);
495         break;
496     case HTMLTokenTypes::StartTag:
497         m_shouldSkipLeadingNewline = false;
498         processStartTag(token);
499         break;
500     case HTMLTokenTypes::EndTag:
501         m_shouldSkipLeadingNewline = false;
502         processEndTag(token);
503         break;
504     case HTMLTokenTypes::Comment:
505         m_shouldSkipLeadingNewline = false;
506         processComment(token);
507         return;
508     case HTMLTokenTypes::Character:
509         processCharacter(token);
510         break;
511     case HTMLTokenTypes::EndOfFile:
512         m_shouldSkipLeadingNewline = false;
513         processEndOfFile(token);
514         break;
515     }
516 }
517
518 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
519 {
520     ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
521     if (m_insertionMode == InitialMode) {
522         m_tree.insertDoctype(token);
523         setInsertionMode(BeforeHTMLMode);
524         return;
525     }
526     if (m_insertionMode == InTableTextMode) {
527         defaultForInTableText();
528         processDoctypeToken(token);
529         return;
530     }
531     parseError(token);
532 }
533
534 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassOwnPtr<NamedNodeMap> attributes)
535 {
536     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
537     AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
538     processStartTag(fakeToken);
539 }
540
541 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
542 {
543     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
544     AtomicHTMLToken fakeToken(HTMLTokenTypes::EndTag, tagName.localName());
545     processEndTag(fakeToken);
546 }
547
548 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
549 {
550     ASSERT(!characters.isEmpty());
551     ExternalCharacterTokenBuffer buffer(characters);
552     processCharacterBuffer(buffer);
553 }
554
555 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
556 {
557     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
558         return;
559     AtomicHTMLToken endP(HTMLTokenTypes::EndTag, pTag.localName());
560     processEndTag(endP);
561 }
562
563 PassOwnPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
564 {
565     OwnPtr<NamedNodeMap> attributes = token.takeAttributes();
566     if (!attributes)
567         attributes = NamedNodeMap::create();
568     else {
569         attributes->removeAttribute(nameAttr);
570         attributes->removeAttribute(actionAttr);
571         attributes->removeAttribute(promptAttr);
572     }
573
574     RefPtr<Attribute> mappedAttribute = Attribute::create(nameAttr, isindexTag.localName());
575     attributes->insertAttribute(mappedAttribute.release(), false);
576     return attributes.release();
577 }
578
579 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
580 {
581     ASSERT(token.type() == HTMLTokenTypes::StartTag);
582     ASSERT(token.name() == isindexTag);
583     parseError(token);
584     if (m_tree.form())
585         return;
586     notImplemented(); // Acknowledge self-closing flag
587     processFakeStartTag(formTag);
588     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
589     if (actionAttribute) {
590         ASSERT(m_tree.currentElement()->hasTagName(formTag));
591         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
592     }
593     processFakeStartTag(hrTag);
594     processFakeStartTag(labelTag);
595     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
596     if (promptAttribute)
597         processFakeCharacters(promptAttribute->value());
598     else
599         processFakeCharacters(searchableIndexIntroduction());
600     processFakeStartTag(inputTag, attributesForIsindexInput(token));
601     notImplemented(); // This second set of characters may be needed by non-english locales.
602     processFakeEndTag(labelTag);
603     processFakeStartTag(hrTag);
604     processFakeEndTag(formTag);
605 }
606
607 namespace {
608
609 bool isLi(const ContainerNode* element)
610 {
611     return element->hasTagName(liTag);
612 }
613
614 bool isDdOrDt(const ContainerNode* element)
615 {
616     return element->hasTagName(ddTag)
617         || element->hasTagName(dtTag);
618 }
619
620 }
621
622 template <bool shouldClose(const ContainerNode*)>
623 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
624 {
625     m_framesetOk = false;
626     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
627     while (1) {
628         RefPtr<ContainerNode> node = nodeRecord->node();
629         if (shouldClose(node.get())) {
630             ASSERT(node->isElementNode());
631             processFakeEndTag(toElement(node.get())->tagQName());
632             break;
633         }
634         if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
635             break;
636         nodeRecord = nodeRecord->next();
637     }
638     processFakePEndTagIfPInButtonScope();
639     m_tree.insertHTMLElement(token);
640 }
641
642 namespace {
643
644 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
645
646 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
647 {
648     for (size_t i = 0; i < length; ++i) {
649         const QualifiedName& name = *names[i];
650         const AtomicString& localName = name.localName();
651         AtomicString loweredLocalName = localName.lower();
652         if (loweredLocalName != localName)
653             map->add(loweredLocalName, name);
654     }
655 }
656
657 void adjustSVGTagNameCase(AtomicHTMLToken& token)
658 {
659     static PrefixedNameToQualifiedNameMap* caseMap = 0;
660     if (!caseMap) {
661         caseMap = new PrefixedNameToQualifiedNameMap;
662         size_t length = 0;
663         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
664         mapLoweredLocalNameToName(caseMap, svgTags, length);
665     }
666
667     const QualifiedName& casedName = caseMap->get(token.name());
668     if (casedName.localName().isNull())
669         return;
670     token.setName(casedName.localName());
671 }
672
673 template<QualifiedName** getAttrs(size_t* length)>
674 void adjustAttributes(AtomicHTMLToken& token)
675 {
676     static PrefixedNameToQualifiedNameMap* caseMap = 0;
677     if (!caseMap) {
678         caseMap = new PrefixedNameToQualifiedNameMap;
679         size_t length = 0;
680         QualifiedName** attrs = getAttrs(&length);
681         mapLoweredLocalNameToName(caseMap, attrs, length);
682     }
683
684     NamedNodeMap* attributes = token.attributes();
685     if (!attributes)
686         return;
687
688     for (unsigned x = 0; x < attributes->length(); ++x) {
689         Attribute* attribute = attributes->attributeItem(x);
690         const QualifiedName& casedName = caseMap->get(attribute->localName());
691         if (!casedName.localName().isNull())
692             attribute->parserSetName(casedName);
693     }
694 }
695
696 void adjustSVGAttributes(AtomicHTMLToken& token)
697 {
698     adjustAttributes<SVGNames::getSVGAttrs>(token);
699 }
700
701 void adjustMathMLAttributes(AtomicHTMLToken& token)
702 {
703     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
704 }
705
706 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
707 {
708     for (size_t i = 0; i < length; ++i) {
709         QualifiedName* name = names[i];
710         const AtomicString& localName = name->localName();
711         AtomicString prefixColonLocalName = prefix + ':' + localName;
712         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
713         map->add(prefixColonLocalName, nameWithPrefix);
714     }
715 }
716
717 void adjustForeignAttributes(AtomicHTMLToken& token)
718 {
719     static PrefixedNameToQualifiedNameMap* map = 0;
720     if (!map) {
721         map = new PrefixedNameToQualifiedNameMap;
722         size_t length = 0;
723         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
724         addNamesWithPrefix(map, "xlink", attrs, length);
725
726         attrs = XMLNames::getXMLAttrs(&length);
727         addNamesWithPrefix(map, "xml", attrs, length);
728
729         map->add("xmlns", XMLNSNames::xmlnsAttr);
730         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
731     }
732
733     NamedNodeMap* attributes = token.attributes();
734     if (!attributes)
735         return;
736
737     for (unsigned x = 0; x < attributes->length(); ++x) {
738         Attribute* attribute = attributes->attributeItem(x);
739         const QualifiedName& name = map->get(attribute->localName());
740         if (!name.localName().isNull())
741             attribute->parserSetName(name);
742     }
743 }
744
745 }
746
747 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
748 {
749     ASSERT(token.type() == HTMLTokenTypes::StartTag);
750     if (token.name() == htmlTag) {
751         m_tree.insertHTMLHtmlStartTagInBody(token);
752         return;
753     }
754     if (token.name() == baseTag
755         || token.name() == basefontTag
756         || token.name() == bgsoundTag
757         || token.name() == commandTag
758         || token.name() == linkTag
759         || token.name() == metaTag
760         || token.name() == noframesTag
761         || token.name() == scriptTag
762         || token.name() == styleTag
763         || token.name() == titleTag) {
764         bool didProcess = processStartTagForInHead(token);
765         ASSERT_UNUSED(didProcess, didProcess);
766         return;
767     }
768     if (token.name() == bodyTag) {
769         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
770             ASSERT(isParsingFragment());
771             return;
772         }
773         m_framesetOk = false;
774         m_tree.insertHTMLBodyStartTagInBody(token);
775         return;
776     }
777     if (token.name() == framesetTag) {
778         parseError(token);
779         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
780             ASSERT(isParsingFragment());
781             return;
782         }
783         if (!m_framesetOk)
784             return;
785         ExceptionCode ec = 0;
786         m_tree.openElements()->bodyElement()->remove(ec);
787         ASSERT(!ec);
788         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
789         m_tree.openElements()->popHTMLBodyElement();
790         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
791         m_tree.insertHTMLElement(token);
792         setInsertionMode(InFramesetMode);
793         return;
794     }
795     if (token.name() == addressTag
796         || token.name() == articleTag
797         || token.name() == asideTag
798         || token.name() == blockquoteTag
799         || token.name() == centerTag
800         || token.name() == detailsTag
801         || token.name() == dirTag
802         || token.name() == divTag
803         || token.name() == dlTag
804         || token.name() == fieldsetTag
805         || token.name() == figcaptionTag
806         || token.name() == figureTag
807         || token.name() == footerTag
808         || token.name() == headerTag
809         || token.name() == hgroupTag
810         || token.name() == menuTag
811         || token.name() == navTag
812         || token.name() == olTag
813         || token.name() == pTag
814         || token.name() == sectionTag
815         || token.name() == summaryTag
816         || token.name() == ulTag) {
817         processFakePEndTagIfPInButtonScope();
818         m_tree.insertHTMLElement(token);
819         return;
820     }
821     if (isNumberedHeaderTag(token.name())) {
822         processFakePEndTagIfPInButtonScope();
823         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
824             parseError(token);
825             m_tree.openElements()->pop();
826         }
827         m_tree.insertHTMLElement(token);
828         return;
829     }
830     if (token.name() == preTag || token.name() == listingTag) {
831         processFakePEndTagIfPInButtonScope();
832         m_tree.insertHTMLElement(token);
833         m_shouldSkipLeadingNewline = true;
834         m_framesetOk = false;
835         return;
836     }
837     if (token.name() == formTag) {
838         if (m_tree.form()) {
839             parseError(token);
840             return;
841         }
842         processFakePEndTagIfPInButtonScope();
843         m_tree.insertHTMLFormElement(token);
844         return;
845     }
846     if (token.name() == liTag) {
847         processCloseWhenNestedTag<isLi>(token);
848         return;
849     }
850     if (token.name() == ddTag || token.name() == dtTag) {
851         processCloseWhenNestedTag<isDdOrDt>(token);
852         return;
853     }
854     if (token.name() == plaintextTag) {
855         processFakePEndTagIfPInButtonScope();
856         m_tree.insertHTMLElement(token);
857         m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
858         return;
859     }
860     if (token.name() == buttonTag) {
861         if (m_tree.openElements()->inScope(buttonTag)) {
862             parseError(token);
863             processFakeEndTag(buttonTag);
864             processStartTag(token); // FIXME: Could we just fall through here?
865             return;
866         }
867         m_tree.reconstructTheActiveFormattingElements();
868         m_tree.insertHTMLElement(token);
869         m_framesetOk = false;
870         return;
871     }
872     if (token.name() == aTag) {
873         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
874         if (activeATag) {
875             parseError(token);
876             processFakeEndTag(aTag);
877             m_tree.activeFormattingElements()->remove(activeATag);
878             if (m_tree.openElements()->contains(activeATag))
879                 m_tree.openElements()->remove(activeATag);
880         }
881         m_tree.reconstructTheActiveFormattingElements();
882         m_tree.insertFormattingElement(token);
883         return;
884     }
885     if (isNonAnchorNonNobrFormattingTag(token.name())) {
886         m_tree.reconstructTheActiveFormattingElements();
887         m_tree.insertFormattingElement(token);
888         return;
889     }
890     if (token.name() == nobrTag) {
891         m_tree.reconstructTheActiveFormattingElements();
892         if (m_tree.openElements()->inScope(nobrTag)) {
893             parseError(token);
894             processFakeEndTag(nobrTag);
895             m_tree.reconstructTheActiveFormattingElements();
896         }
897         m_tree.insertFormattingElement(token);
898         return;
899     }
900     if (token.name() == appletTag
901         || token.name() == marqueeTag
902         || token.name() == objectTag) {
903         m_tree.reconstructTheActiveFormattingElements();
904         m_tree.insertHTMLElement(token);
905         m_tree.activeFormattingElements()->appendMarker();
906         m_framesetOk = false;
907         return;
908     }
909     if (token.name() == tableTag) {
910         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
911             processFakeEndTag(pTag);
912         m_tree.insertHTMLElement(token);
913         m_framesetOk = false;
914         setInsertionMode(InTableMode);
915         return;
916     }
917     if (token.name() == imageTag) {
918         parseError(token);
919         // Apparently we're not supposed to ask.
920         token.setName(imgTag.localName());
921         // Note the fall through to the imgTag handling below!
922     }
923     if (token.name() == areaTag
924         || token.name() == brTag
925         || token.name() == embedTag
926         || token.name() == imgTag
927         || token.name() == keygenTag
928         || token.name() == wbrTag) {
929         m_tree.reconstructTheActiveFormattingElements();
930         m_tree.insertSelfClosingHTMLElement(token);
931         m_framesetOk = false;
932         return;
933     }
934     if (token.name() == inputTag) {
935         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
936         m_tree.reconstructTheActiveFormattingElements();
937         m_tree.insertSelfClosingHTMLElement(token);
938         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
939             m_framesetOk = false;
940         return;
941     }
942     if (token.name() == paramTag
943         || token.name() == sourceTag
944         || token.name() == trackTag) {
945         m_tree.insertSelfClosingHTMLElement(token);
946         return;
947     }
948     if (token.name() == hrTag) {
949         processFakePEndTagIfPInButtonScope();
950         m_tree.insertSelfClosingHTMLElement(token);
951         m_framesetOk = false;
952         return;
953     }
954     if (token.name() == isindexTag) {
955         processIsindexStartTagForInBody(token);
956         return;
957     }
958     if (token.name() == textareaTag) {
959         m_tree.insertHTMLElement(token);
960         m_shouldSkipLeadingNewline = true;
961         m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
962         m_originalInsertionMode = m_insertionMode;
963         m_framesetOk = false;
964         setInsertionMode(TextMode);
965         return;
966     }
967     if (token.name() == xmpTag) {
968         processFakePEndTagIfPInButtonScope();
969         m_tree.reconstructTheActiveFormattingElements();
970         m_framesetOk = false;
971         processGenericRawTextStartTag(token);
972         return;
973     }
974     if (token.name() == iframeTag) {
975         m_framesetOk = false;
976         processGenericRawTextStartTag(token);
977         return;
978     }
979     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
980         processGenericRawTextStartTag(token);
981         return;
982     }
983     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
984         processGenericRawTextStartTag(token);
985         return;
986     }
987     if (token.name() == selectTag) {
988         m_tree.reconstructTheActiveFormattingElements();
989         m_tree.insertHTMLElement(token);
990         m_framesetOk = false;
991         if (m_insertionMode == InTableMode
992              || m_insertionMode == InCaptionMode
993              || m_insertionMode == InColumnGroupMode
994              || m_insertionMode == InTableBodyMode
995              || m_insertionMode == InRowMode
996              || m_insertionMode == InCellMode)
997             setInsertionMode(InSelectInTableMode);
998         else
999             setInsertionMode(InSelectMode);
1000         return;
1001     }
1002     if (token.name() == optgroupTag || token.name() == optionTag) {
1003         if (m_tree.currentNode()->hasTagName(optionTag)) {
1004             AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1005             processEndTag(endOption);
1006         }
1007         m_tree.reconstructTheActiveFormattingElements();
1008         m_tree.insertHTMLElement(token);
1009         return;
1010     }
1011     if (token.name() == rpTag || token.name() == rtTag) {
1012         if (m_tree.openElements()->inScope(rubyTag.localName())) {
1013             m_tree.generateImpliedEndTags();
1014             if (!m_tree.currentNode()->hasTagName(rubyTag))
1015                 parseError(token);
1016         }
1017         m_tree.insertHTMLElement(token);
1018         return;
1019     }
1020     if (token.name() == MathMLNames::mathTag.localName()) {
1021         m_tree.reconstructTheActiveFormattingElements();
1022         adjustMathMLAttributes(token);
1023         adjustForeignAttributes(token);
1024         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1025         return;
1026     }
1027     if (token.name() == SVGNames::svgTag.localName()) {
1028         m_tree.reconstructTheActiveFormattingElements();
1029         adjustSVGAttributes(token);
1030         adjustForeignAttributes(token);
1031         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1032         return;
1033     }
1034     if (isCaptionColOrColgroupTag(token.name())
1035         || token.name() == frameTag
1036         || token.name() == headTag
1037         || isTableBodyContextTag(token.name())
1038         || isTableCellContextTag(token.name())
1039         || token.name() == trTag) {
1040         parseError(token);
1041         return;
1042     }
1043     m_tree.reconstructTheActiveFormattingElements();
1044     m_tree.insertHTMLElement(token);
1045 }
1046
1047 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1048 {
1049     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1050         ASSERT(isParsingFragment());
1051         // FIXME: parse error
1052         return false;
1053     }
1054     m_tree.openElements()->pop();
1055     setInsertionMode(InTableMode);
1056     return true;
1057 }
1058
1059 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1060 void HTMLTreeBuilder::closeTheCell()
1061 {
1062     ASSERT(insertionMode() == InCellMode);
1063     if (m_tree.openElements()->inTableScope(tdTag)) {
1064         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1065         processFakeEndTag(tdTag);
1066         return;
1067     }
1068     ASSERT(m_tree.openElements()->inTableScope(thTag));
1069     processFakeEndTag(thTag);
1070     ASSERT(insertionMode() == InRowMode);
1071 }
1072
1073 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1074 {
1075     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1076     if (token.name() == captionTag) {
1077         m_tree.openElements()->popUntilTableScopeMarker();
1078         m_tree.activeFormattingElements()->appendMarker();
1079         m_tree.insertHTMLElement(token);
1080         setInsertionMode(InCaptionMode);
1081         return;
1082     }
1083     if (token.name() == colgroupTag) {
1084         m_tree.openElements()->popUntilTableScopeMarker();
1085         m_tree.insertHTMLElement(token);
1086         setInsertionMode(InColumnGroupMode);
1087         return;
1088     }
1089     if (token.name() == colTag) {
1090         processFakeStartTag(colgroupTag);
1091         ASSERT(InColumnGroupMode);
1092         processStartTag(token);
1093         return;
1094     }
1095     if (isTableBodyContextTag(token.name())) {
1096         m_tree.openElements()->popUntilTableScopeMarker();
1097         m_tree.insertHTMLElement(token);
1098         setInsertionMode(InTableBodyMode);
1099         return;
1100     }
1101     if (isTableCellContextTag(token.name())
1102         || token.name() == trTag) {
1103         processFakeStartTag(tbodyTag);
1104         ASSERT(insertionMode() == InTableBodyMode);
1105         processStartTag(token);
1106         return;
1107     }
1108     if (token.name() == tableTag) {
1109         parseError(token);
1110         if (!processTableEndTagForInTable()) {
1111             ASSERT(isParsingFragment());
1112             return;
1113         }
1114         processStartTag(token);
1115         return;
1116     }
1117     if (token.name() == styleTag || token.name() == scriptTag) {
1118         processStartTagForInHead(token);
1119         return;
1120     }
1121     if (token.name() == inputTag) {
1122         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1123         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1124             parseError(token);
1125             m_tree.insertSelfClosingHTMLElement(token);
1126             return;
1127         }
1128         // Fall through to "anything else" case.
1129     }
1130     if (token.name() == formTag) {
1131         parseError(token);
1132         if (m_tree.form())
1133             return;
1134         m_tree.insertHTMLFormElement(token, true);
1135         m_tree.openElements()->pop();
1136         return;
1137     }
1138     parseError(token);
1139     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1140     processStartTagForInBody(token);
1141 }
1142
1143 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1144 {
1145     ASSERT(token.type() == HTMLTokenTypes::StartTag);
1146     switch (insertionMode()) {
1147     case InitialMode:
1148         ASSERT(insertionMode() == InitialMode);
1149         defaultForInitial();
1150         // Fall through.
1151     case BeforeHTMLMode:
1152         ASSERT(insertionMode() == BeforeHTMLMode);
1153         if (token.name() == htmlTag) {
1154             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1155             setInsertionMode(BeforeHeadMode);
1156             return;
1157         }
1158         defaultForBeforeHTML();
1159         // Fall through.
1160     case BeforeHeadMode:
1161         ASSERT(insertionMode() == BeforeHeadMode);
1162         if (token.name() == htmlTag) {
1163             m_tree.insertHTMLHtmlStartTagInBody(token);
1164             return;
1165         }
1166         if (token.name() == headTag) {
1167             m_tree.insertHTMLHeadElement(token);
1168             setInsertionMode(InHeadMode);
1169             return;
1170         }
1171         defaultForBeforeHead();
1172         // Fall through.
1173     case InHeadMode:
1174         ASSERT(insertionMode() == InHeadMode);
1175         if (processStartTagForInHead(token))
1176             return;
1177         defaultForInHead();
1178         // Fall through.
1179     case AfterHeadMode:
1180         ASSERT(insertionMode() == AfterHeadMode);
1181         if (token.name() == htmlTag) {
1182             m_tree.insertHTMLHtmlStartTagInBody(token);
1183             return;
1184         }
1185         if (token.name() == bodyTag) {
1186             m_framesetOk = false;
1187             m_tree.insertHTMLBodyElement(token);
1188             setInsertionMode(InBodyMode);
1189             return;
1190         }
1191         if (token.name() == framesetTag) {
1192             m_tree.insertHTMLElement(token);
1193             setInsertionMode(InFramesetMode);
1194             return;
1195         }
1196         if (token.name() == baseTag
1197             || token.name() == basefontTag
1198             || token.name() == bgsoundTag
1199             || token.name() == linkTag
1200             || token.name() == metaTag
1201             || token.name() == noframesTag
1202             || token.name() == scriptTag
1203             || token.name() == styleTag
1204             || token.name() == titleTag) {
1205             parseError(token);
1206             ASSERT(m_tree.head());
1207             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1208             processStartTagForInHead(token);
1209             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1210             return;
1211         }
1212         if (token.name() == headTag) {
1213             parseError(token);
1214             return;
1215         }
1216         defaultForAfterHead();
1217         // Fall through
1218     case InBodyMode:
1219         ASSERT(insertionMode() == InBodyMode);
1220         processStartTagForInBody(token);
1221         break;
1222     case InTableMode:
1223         ASSERT(insertionMode() == InTableMode);
1224         processStartTagForInTable(token);
1225         break;
1226     case InCaptionMode:
1227         ASSERT(insertionMode() == InCaptionMode);
1228         if (isCaptionColOrColgroupTag(token.name())
1229             || isTableBodyContextTag(token.name())
1230             || isTableCellContextTag(token.name())
1231             || token.name() == trTag) {
1232             parseError(token);
1233             if (!processCaptionEndTagForInCaption()) {
1234                 ASSERT(isParsingFragment());
1235                 return;
1236             }
1237             processStartTag(token);
1238             return;
1239         }
1240         processStartTagForInBody(token);
1241         break;
1242     case InColumnGroupMode:
1243         ASSERT(insertionMode() == InColumnGroupMode);
1244         if (token.name() == htmlTag) {
1245             m_tree.insertHTMLHtmlStartTagInBody(token);
1246             return;
1247         }
1248         if (token.name() == colTag) {
1249             m_tree.insertSelfClosingHTMLElement(token);
1250             return;
1251         }
1252         if (!processColgroupEndTagForInColumnGroup()) {
1253             ASSERT(isParsingFragment());
1254             return;
1255         }
1256         processStartTag(token);
1257         break;
1258     case InTableBodyMode:
1259         ASSERT(insertionMode() == InTableBodyMode);
1260         if (token.name() == trTag) {
1261             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1262             m_tree.insertHTMLElement(token);
1263             setInsertionMode(InRowMode);
1264             return;
1265         }
1266         if (isTableCellContextTag(token.name())) {
1267             parseError(token);
1268             processFakeStartTag(trTag);
1269             ASSERT(insertionMode() == InRowMode);
1270             processStartTag(token);
1271             return;
1272         }
1273         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1274             // FIXME: This is slow.
1275             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1276                 ASSERT(isParsingFragment());
1277                 parseError(token);
1278                 return;
1279             }
1280             m_tree.openElements()->popUntilTableBodyScopeMarker();
1281             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1282             processFakeEndTag(m_tree.currentElement()->tagQName());
1283             processStartTag(token);
1284             return;
1285         }
1286         processStartTagForInTable(token);
1287         break;
1288     case InRowMode:
1289         ASSERT(insertionMode() == InRowMode);
1290         if (isTableCellContextTag(token.name())) {
1291             m_tree.openElements()->popUntilTableRowScopeMarker();
1292             m_tree.insertHTMLElement(token);
1293             setInsertionMode(InCellMode);
1294             m_tree.activeFormattingElements()->appendMarker();
1295             return;
1296         }
1297         if (token.name() == trTag
1298             || isCaptionColOrColgroupTag(token.name())
1299             || isTableBodyContextTag(token.name())) {
1300             if (!processTrEndTagForInRow()) {
1301                 ASSERT(isParsingFragment());
1302                 return;
1303             }
1304             ASSERT(insertionMode() == InTableBodyMode);
1305             processStartTag(token);
1306             return;
1307         }
1308         processStartTagForInTable(token);
1309         break;
1310     case InCellMode:
1311         ASSERT(insertionMode() == InCellMode);
1312         if (isCaptionColOrColgroupTag(token.name())
1313             || isTableCellContextTag(token.name())
1314             || token.name() == trTag
1315             || isTableBodyContextTag(token.name())) {
1316             // FIXME: This could be more efficient.
1317             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1318                 ASSERT(isParsingFragment());
1319                 parseError(token);
1320                 return;
1321             }
1322             closeTheCell();
1323             processStartTag(token);
1324             return;
1325         }
1326         processStartTagForInBody(token);
1327         break;
1328     case AfterBodyMode:
1329     case AfterAfterBodyMode:
1330         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1331         if (token.name() == htmlTag) {
1332             m_tree.insertHTMLHtmlStartTagInBody(token);
1333             return;
1334         }
1335         setInsertionMode(InBodyMode);
1336         processStartTag(token);
1337         break;
1338     case InHeadNoscriptMode:
1339         ASSERT(insertionMode() == InHeadNoscriptMode);
1340         if (token.name() == htmlTag) {
1341             m_tree.insertHTMLHtmlStartTagInBody(token);
1342             return;
1343         }
1344         if (token.name() == basefontTag
1345             || token.name() == bgsoundTag
1346             || token.name() == linkTag
1347             || token.name() == metaTag
1348             || token.name() == noframesTag
1349             || token.name() == styleTag) {
1350             bool didProcess = processStartTagForInHead(token);
1351             ASSERT_UNUSED(didProcess, didProcess);
1352             return;
1353         }
1354         if (token.name() == htmlTag || token.name() == noscriptTag) {
1355             parseError(token);
1356             return;
1357         }
1358         defaultForInHeadNoscript();
1359         processToken(token);
1360         break;
1361     case InFramesetMode:
1362         ASSERT(insertionMode() == InFramesetMode);
1363         if (token.name() == htmlTag) {
1364             m_tree.insertHTMLHtmlStartTagInBody(token);
1365             return;
1366         }
1367         if (token.name() == framesetTag) {
1368             m_tree.insertHTMLElement(token);
1369             return;
1370         }
1371         if (token.name() == frameTag) {
1372             m_tree.insertSelfClosingHTMLElement(token);
1373             return;
1374         }
1375         if (token.name() == noframesTag) {
1376             processStartTagForInHead(token);
1377             return;
1378         }
1379         parseError(token);
1380         break;
1381     case AfterFramesetMode:
1382     case AfterAfterFramesetMode:
1383         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1384         if (token.name() == htmlTag) {
1385             m_tree.insertHTMLHtmlStartTagInBody(token);
1386             return;
1387         }
1388         if (token.name() == noframesTag) {
1389             processStartTagForInHead(token);
1390             return;
1391         }
1392         parseError(token);
1393         break;
1394     case InSelectInTableMode:
1395         ASSERT(insertionMode() == InSelectInTableMode);
1396         if (token.name() == captionTag
1397             || token.name() == tableTag
1398             || isTableBodyContextTag(token.name())
1399             || token.name() == trTag
1400             || isTableCellContextTag(token.name())) {
1401             parseError(token);
1402             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1403             processEndTag(endSelect);
1404             processStartTag(token);
1405             return;
1406         }
1407         // Fall through
1408     case InSelectMode:
1409         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1410         if (token.name() == htmlTag) {
1411             m_tree.insertHTMLHtmlStartTagInBody(token);
1412             return;
1413         }
1414         if (token.name() == optionTag) {
1415             if (m_tree.currentNode()->hasTagName(optionTag)) {
1416                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1417                 processEndTag(endOption);
1418             }
1419             m_tree.insertHTMLElement(token);
1420             return;
1421         }
1422         if (token.name() == optgroupTag) {
1423             if (m_tree.currentNode()->hasTagName(optionTag)) {
1424                 AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
1425                 processEndTag(endOption);
1426             }
1427             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1428                 AtomicHTMLToken endOptgroup(HTMLTokenTypes::EndTag, optgroupTag.localName());
1429                 processEndTag(endOptgroup);
1430             }
1431             m_tree.insertHTMLElement(token);
1432             return;
1433         }
1434         if (token.name() == selectTag) {
1435             parseError(token);
1436             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1437             processEndTag(endSelect);
1438             return;
1439         }
1440         if (token.name() == inputTag
1441             || token.name() == keygenTag
1442             || token.name() == textareaTag) {
1443             parseError(token);
1444             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1445                 ASSERT(isParsingFragment());
1446                 return;
1447             }
1448             AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
1449             processEndTag(endSelect);
1450             processStartTag(token);
1451             return;
1452         }
1453         if (token.name() == scriptTag) {
1454             bool didProcess = processStartTagForInHead(token);
1455             ASSERT_UNUSED(didProcess, didProcess);
1456             return;
1457         }
1458         break;
1459     case InTableTextMode:
1460         defaultForInTableText();
1461         processStartTag(token);
1462         break;
1463     case TextMode:
1464         ASSERT_NOT_REACHED();
1465         break;
1466     }
1467 }
1468
1469 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1470 {
1471     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1472     ASSERT(token.name() == bodyTag);
1473     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1474         parseError(token);
1475         return false;
1476     }
1477     notImplemented(); // Emit a more specific parse error based on stack contents.
1478     setInsertionMode(AfterBodyMode);
1479     return true;
1480 }
1481
1482 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1483 {
1484     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1485     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1486     while (1) {
1487         RefPtr<ContainerNode> node = record->node();
1488         if (node->hasLocalName(token.name())) {
1489             m_tree.generateImpliedEndTags();
1490             // FIXME: The ElementRecord pointed to by record might be deleted by
1491             // the preceding call. Perhaps we should hold a RefPtr so that it
1492             // stays alive for the duration of record's scope.
1493             record = 0;
1494             if (!m_tree.currentNode()->hasLocalName(token.name())) {
1495                 parseError(token);
1496                 // FIXME: This is either a bug in the spec, or a bug in our
1497                 // implementation.  Filed a bug with HTML5:
1498                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1499                 // We might have already popped the node for the token in
1500                 // generateImpliedEndTags, just abort.
1501                 if (!m_tree.openElements()->contains(toElement(node.get())))
1502                     return;
1503             }
1504             m_tree.openElements()->popUntilPopped(toElement(node.get()));
1505             return;
1506         }
1507         if (isSpecialNode(node.get())) {
1508             parseError(token);
1509             return;
1510         }
1511         record = record->next();
1512     }
1513 }
1514
1515 // FIXME: This probably belongs on HTMLElementStack.
1516 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1517 {
1518     HTMLElementStack::ElementRecord* furthestBlock = 0;
1519     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1520     for (; record; record = record->next()) {
1521         if (record->element() == formattingElement)
1522             return furthestBlock;
1523         if (isSpecialNode(record->element()))
1524             furthestBlock = record;
1525     }
1526     ASSERT_NOT_REACHED();
1527     return 0;
1528 }
1529
1530 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1531 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1532 {
1533     // The adoption agency algorithm is N^2. We limit the number of iterations
1534     // to stop from hanging the whole browser. This limit is specified in the
1535     // adoption agency algorithm: 
1536     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1537     static const int outerIterationLimit = 8;
1538     static const int innerIterationLimit = 3;
1539
1540     for (int i = 0; i < outerIterationLimit; ++i) {
1541         // 1.
1542         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1543         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1544             parseError(token);
1545             notImplemented(); // Check the stack of open elements for a more specific parse error.
1546             return;
1547         }
1548         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1549         if (!formattingElementRecord) {
1550             parseError(token);
1551             m_tree.activeFormattingElements()->remove(formattingElement);
1552             return;
1553         }
1554         if (formattingElement != m_tree.currentElement())
1555             parseError(token);
1556         // 2.
1557         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1558         // 3.
1559         if (!furthestBlock) {
1560             m_tree.openElements()->popUntilPopped(formattingElement);
1561             m_tree.activeFormattingElements()->remove(formattingElement);
1562             return;
1563         }
1564         // 4.
1565         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1566         RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1567         // 5.
1568         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1569         // 6.
1570         HTMLElementStack::ElementRecord* node = furthestBlock;
1571         HTMLElementStack::ElementRecord* nextNode = node->next();
1572         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1573         for (int i = 0; i < innerIterationLimit; ++i) {
1574             // 6.1
1575             node = nextNode;
1576             ASSERT(node);
1577             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1578             // 6.2
1579             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1580                 m_tree.openElements()->remove(node->element());
1581                 node = 0;
1582                 continue;
1583             }
1584             // 6.3
1585             if (node == formattingElementRecord)
1586                 break;
1587             // 6.5
1588             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1589             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1590             nodeEntry->replaceElement(newElement.get());
1591             node->replaceElement(newElement.release());
1592             // 6.4 -- Intentionally out of order to handle the case where node
1593             // was replaced in 6.5.
1594             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1595             if (lastNode == furthestBlock)
1596                 bookmark.moveToAfter(nodeEntry);
1597             // 6.6
1598             if (ContainerNode* parent = lastNode->element()->parentNode())
1599                 parent->parserRemoveChild(lastNode->element());
1600             node->element()->parserAddChild(lastNode->element());
1601             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1602                 lastNode->element()->lazyAttach();
1603             // 6.7
1604             lastNode = node;
1605         }
1606         // 7
1607         const AtomicString& commonAncestorTag = commonAncestor->localName();
1608         if (ContainerNode* parent = lastNode->element()->parentNode())
1609             parent->parserRemoveChild(lastNode->element());
1610         // FIXME: If this moves to HTMLConstructionSite, this check should use
1611         // causesFosterParenting(tagName) instead.
1612         if (commonAncestorTag == tableTag
1613             || commonAncestorTag == trTag
1614             || isTableBodyContextTag(commonAncestorTag))
1615             m_tree.fosterParent(lastNode->element());
1616         else {
1617             commonAncestor->parserAddChild(lastNode->element());
1618             ASSERT(lastNode->node()->isElementNode());
1619             ASSERT(lastNode->element()->parentNode());
1620             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1621                 lastNode->element()->lazyAttach();
1622         }
1623         // 8
1624         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1625         // 9
1626         newElement->takeAllChildrenFrom(furthestBlock->element());
1627         // 10
1628         Element* furthestBlockElement = furthestBlock->element();
1629         // FIXME: All this creation / parserAddChild / attach business should
1630         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1631         //        should all be in some HTMLConstructionSite function.
1632         furthestBlockElement->parserAddChild(newElement);
1633         if (furthestBlockElement->attached() && !newElement->attached()) {
1634             // Notice that newElement might already be attached if, for example, one of the reparented
1635             // children is a style element, which attaches itself automatically.
1636             newElement->attach();
1637         }
1638         // 11
1639         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1640         // 12
1641         m_tree.openElements()->remove(formattingElement);
1642         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1643     }
1644 }
1645
1646 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1647 {
1648     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1649     bool last = false;
1650     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1651     while (1) {
1652         ContainerNode* node = nodeRecord->node();
1653         if (node == m_tree.openElements()->rootNode()) {
1654             ASSERT(isParsingFragment());
1655             last = true;
1656             node = m_fragmentContext.contextElement();
1657         }
1658         if (node->hasTagName(selectTag)) {
1659             ASSERT(isParsingFragment());
1660             return setInsertionMode(InSelectMode);
1661         }
1662         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1663             return setInsertionMode(InCellMode);
1664         if (node->hasTagName(trTag))
1665             return setInsertionMode(InRowMode);
1666         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1667             return setInsertionMode(InTableBodyMode);
1668         if (node->hasTagName(captionTag))
1669             return setInsertionMode(InCaptionMode);
1670         if (node->hasTagName(colgroupTag)) {
1671             ASSERT(isParsingFragment());
1672             return setInsertionMode(InColumnGroupMode);
1673         }
1674         if (node->hasTagName(tableTag))
1675             return setInsertionMode(InTableMode);
1676         if (node->hasTagName(headTag)) {
1677             ASSERT(isParsingFragment());
1678             return setInsertionMode(InBodyMode);
1679         }
1680         if (node->hasTagName(bodyTag))
1681             return setInsertionMode(InBodyMode);
1682         if (node->hasTagName(framesetTag)) {
1683             ASSERT(isParsingFragment());
1684             return setInsertionMode(InFramesetMode);
1685         }
1686         if (node->hasTagName(htmlTag)) {
1687             ASSERT(isParsingFragment());
1688             return setInsertionMode(BeforeHeadMode);
1689         }
1690         if (last) {
1691             ASSERT(isParsingFragment());
1692             return setInsertionMode(InBodyMode);
1693         }
1694         nodeRecord = nodeRecord->next();
1695     }
1696 }
1697
1698 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1699 {
1700     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1701     if (isTableBodyContextTag(token.name())) {
1702         if (!m_tree.openElements()->inTableScope(token.name())) {
1703             parseError(token);
1704             return;
1705         }
1706         m_tree.openElements()->popUntilTableBodyScopeMarker();
1707         m_tree.openElements()->pop();
1708         setInsertionMode(InTableMode);
1709         return;
1710     }
1711     if (token.name() == tableTag) {
1712         // FIXME: This is slow.
1713         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1714             ASSERT(isParsingFragment());
1715             parseError(token);
1716             return;
1717         }
1718         m_tree.openElements()->popUntilTableBodyScopeMarker();
1719         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1720         processFakeEndTag(m_tree.currentElement()->tagQName());
1721         processEndTag(token);
1722         return;
1723     }
1724     if (token.name() == bodyTag
1725         || isCaptionColOrColgroupTag(token.name())
1726         || token.name() == htmlTag
1727         || isTableCellContextTag(token.name())
1728         || token.name() == trTag) {
1729         parseError(token);
1730         return;
1731     }
1732     processEndTagForInTable(token);
1733 }
1734
1735 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1736 {
1737     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1738     if (token.name() == trTag) {
1739         processTrEndTagForInRow();
1740         return;
1741     }
1742     if (token.name() == tableTag) {
1743         if (!processTrEndTagForInRow()) {
1744             ASSERT(isParsingFragment());
1745             return;
1746         }
1747         ASSERT(insertionMode() == InTableBodyMode);
1748         processEndTag(token);
1749         return;
1750     }
1751     if (isTableBodyContextTag(token.name())) {
1752         if (!m_tree.openElements()->inTableScope(token.name())) {
1753             parseError(token);
1754             return;
1755         }
1756         processFakeEndTag(trTag);
1757         ASSERT(insertionMode() == InTableBodyMode);
1758         processEndTag(token);
1759         return;
1760     }
1761     if (token.name() == bodyTag
1762         || isCaptionColOrColgroupTag(token.name())
1763         || token.name() == htmlTag
1764         || isTableCellContextTag(token.name())) {
1765         parseError(token);
1766         return;
1767     }
1768     processEndTagForInTable(token);
1769 }
1770
1771 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1772 {
1773     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1774     if (isTableCellContextTag(token.name())) {
1775         if (!m_tree.openElements()->inTableScope(token.name())) {
1776             parseError(token);
1777             return;
1778         }
1779         m_tree.generateImpliedEndTags();
1780         if (!m_tree.currentNode()->hasLocalName(token.name()))
1781             parseError(token);
1782         m_tree.openElements()->popUntilPopped(token.name());
1783         m_tree.activeFormattingElements()->clearToLastMarker();
1784         setInsertionMode(InRowMode);
1785         return;
1786     }
1787     if (token.name() == bodyTag
1788         || isCaptionColOrColgroupTag(token.name())
1789         || token.name() == htmlTag) {
1790         parseError(token);
1791         return;
1792     }
1793     if (token.name() == tableTag
1794         || token.name() == trTag
1795         || isTableBodyContextTag(token.name())) {
1796         if (!m_tree.openElements()->inTableScope(token.name())) {
1797             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1798             parseError(token);
1799             return;
1800         }
1801         closeTheCell();
1802         processEndTag(token);
1803         return;
1804     }
1805     processEndTagForInBody(token);
1806 }
1807
1808 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1809 {
1810     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1811     if (token.name() == bodyTag) {
1812         processBodyEndTagForInBody(token);
1813         return;
1814     }
1815     if (token.name() == htmlTag) {
1816         AtomicHTMLToken endBody(HTMLTokenTypes::EndTag, bodyTag.localName());
1817         if (processBodyEndTagForInBody(endBody))
1818             processEndTag(token);
1819         return;
1820     }
1821     if (token.name() == addressTag
1822         || token.name() == articleTag
1823         || token.name() == asideTag
1824         || token.name() == blockquoteTag
1825         || token.name() == buttonTag
1826         || token.name() == centerTag
1827         || token.name() == detailsTag
1828         || token.name() == dirTag
1829         || token.name() == divTag
1830         || token.name() == dlTag
1831         || token.name() == fieldsetTag
1832         || token.name() == figcaptionTag
1833         || token.name() == figureTag
1834         || token.name() == footerTag
1835         || token.name() == headerTag
1836         || token.name() == hgroupTag
1837         || token.name() == listingTag
1838         || token.name() == menuTag
1839         || token.name() == navTag
1840         || token.name() == olTag
1841         || token.name() == preTag
1842         || token.name() == sectionTag
1843         || token.name() == summaryTag
1844         || token.name() == ulTag) {
1845         if (!m_tree.openElements()->inScope(token.name())) {
1846             parseError(token);
1847             return;
1848         }
1849         m_tree.generateImpliedEndTags();
1850         if (!m_tree.currentNode()->hasLocalName(token.name()))
1851             parseError(token);
1852         m_tree.openElements()->popUntilPopped(token.name());
1853         return;
1854     }
1855     if (token.name() == formTag) {
1856         RefPtr<Element> node = m_tree.takeForm();
1857         if (!node || !m_tree.openElements()->inScope(node.get())) {
1858             parseError(token);
1859             return;
1860         }
1861         m_tree.generateImpliedEndTags();
1862         if (m_tree.currentElement() != node.get())
1863             parseError(token);
1864         m_tree.openElements()->remove(node.get());
1865     }
1866     if (token.name() == pTag) {
1867         if (!m_tree.openElements()->inButtonScope(token.name())) {
1868             parseError(token);
1869             processFakeStartTag(pTag);
1870             ASSERT(m_tree.openElements()->inScope(token.name()));
1871             processEndTag(token);
1872             return;
1873         }
1874         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1875         if (!m_tree.currentNode()->hasLocalName(token.name()))
1876             parseError(token);
1877         m_tree.openElements()->popUntilPopped(token.name());
1878         return;
1879     }
1880     if (token.name() == liTag) {
1881         if (!m_tree.openElements()->inListItemScope(token.name())) {
1882             parseError(token);
1883             return;
1884         }
1885         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1886         if (!m_tree.currentNode()->hasLocalName(token.name()))
1887             parseError(token);
1888         m_tree.openElements()->popUntilPopped(token.name());
1889         return;
1890     }
1891     if (token.name() == ddTag
1892         || token.name() == dtTag) {
1893         if (!m_tree.openElements()->inScope(token.name())) {
1894             parseError(token);
1895             return;
1896         }
1897         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1898         if (!m_tree.currentNode()->hasLocalName(token.name()))
1899             parseError(token);
1900         m_tree.openElements()->popUntilPopped(token.name());
1901         return;
1902     }
1903     if (isNumberedHeaderTag(token.name())) {
1904         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1905             parseError(token);
1906             return;
1907         }
1908         m_tree.generateImpliedEndTags();
1909         if (!m_tree.currentNode()->hasLocalName(token.name()))
1910             parseError(token);
1911         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1912         return;
1913     }
1914     if (isFormattingTag(token.name())) {
1915         callTheAdoptionAgency(token);
1916         return;
1917     }
1918     if (token.name() == appletTag
1919         || token.name() == marqueeTag
1920         || token.name() == objectTag) {
1921         if (!m_tree.openElements()->inScope(token.name())) {
1922             parseError(token);
1923             return;
1924         }
1925         m_tree.generateImpliedEndTags();
1926         if (!m_tree.currentNode()->hasLocalName(token.name()))
1927             parseError(token);
1928         m_tree.openElements()->popUntilPopped(token.name());
1929         m_tree.activeFormattingElements()->clearToLastMarker();
1930         return;
1931     }
1932     if (token.name() == brTag) {
1933         parseError(token);
1934         processFakeStartTag(brTag);
1935         return;
1936     }
1937     processAnyOtherEndTagForInBody(token);
1938 }
1939
1940 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1941 {
1942     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1943         ASSERT(isParsingFragment());
1944         // FIXME: parse error
1945         return false;
1946     }
1947     m_tree.generateImpliedEndTags();
1948     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1949     m_tree.openElements()->popUntilPopped(captionTag.localName());
1950     m_tree.activeFormattingElements()->clearToLastMarker();
1951     setInsertionMode(InTableMode);
1952     return true;
1953 }
1954
1955 bool HTMLTreeBuilder::processTrEndTagForInRow()
1956 {
1957     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1958         ASSERT(isParsingFragment());
1959         // FIXME: parse error
1960         return false;
1961     }
1962     m_tree.openElements()->popUntilTableRowScopeMarker();
1963     ASSERT(m_tree.currentElement()->hasTagName(trTag));
1964     m_tree.openElements()->pop();
1965     setInsertionMode(InTableBodyMode);
1966     return true;
1967 }
1968
1969 bool HTMLTreeBuilder::processTableEndTagForInTable()
1970 {
1971     if (!m_tree.openElements()->inTableScope(tableTag)) {
1972         ASSERT(isParsingFragment());
1973         // FIXME: parse error.
1974         return false;
1975     }
1976     m_tree.openElements()->popUntilPopped(tableTag.localName());
1977     resetInsertionModeAppropriately();
1978     return true;
1979 }
1980
1981 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1982 {
1983     ASSERT(token.type() == HTMLTokenTypes::EndTag);
1984     if (token.name() == tableTag) {
1985         processTableEndTagForInTable();
1986         return;
1987     }
1988     if (token.name() == bodyTag
1989         || isCaptionColOrColgroupTag(token.name())
1990         || token.name() == htmlTag
1991         || isTableBodyContextTag(token.name())
1992         || isTableCellContextTag(token.name())
1993         || token.name() == trTag) {
1994         parseError(token);
1995         return;
1996     }
1997     // Is this redirection necessary here?
1998     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1999     processEndTagForInBody(token);
2000 }
2001
2002 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2003 {
2004     ASSERT(token.type() == HTMLTokenTypes::EndTag);
2005     switch (insertionMode()) {
2006     case InitialMode:
2007         ASSERT(insertionMode() == InitialMode);
2008         defaultForInitial();
2009         // Fall through.
2010     case BeforeHTMLMode:
2011         ASSERT(insertionMode() == BeforeHTMLMode);
2012         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2013             parseError(token);
2014             return;
2015         }
2016         defaultForBeforeHTML();
2017         // Fall through.
2018     case BeforeHeadMode:
2019         ASSERT(insertionMode() == BeforeHeadMode);
2020         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2021             parseError(token);
2022             return;
2023         }
2024         defaultForBeforeHead();
2025         // Fall through.
2026     case InHeadMode:
2027         ASSERT(insertionMode() == InHeadMode);
2028         if (token.name() == headTag) {
2029             m_tree.openElements()->popHTMLHeadElement();
2030             setInsertionMode(AfterHeadMode);
2031             return;
2032         }
2033         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2034             parseError(token);
2035             return;
2036         }
2037         defaultForInHead();
2038         // Fall through.
2039     case AfterHeadMode:
2040         ASSERT(insertionMode() == AfterHeadMode);
2041         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2042             parseError(token);
2043             return;
2044         }
2045         defaultForAfterHead();
2046         // Fall through
2047     case InBodyMode:
2048         ASSERT(insertionMode() == InBodyMode);
2049         processEndTagForInBody(token);
2050         break;
2051     case InTableMode:
2052         ASSERT(insertionMode() == InTableMode);
2053         processEndTagForInTable(token);
2054         break;
2055     case InCaptionMode:
2056         ASSERT(insertionMode() == InCaptionMode);
2057         if (token.name() == captionTag) {
2058             processCaptionEndTagForInCaption();
2059             return;
2060         }
2061         if (token.name() == tableTag) {
2062             parseError(token);
2063             if (!processCaptionEndTagForInCaption()) {
2064                 ASSERT(isParsingFragment());
2065                 return;
2066             }
2067             processEndTag(token);
2068             return;
2069         }
2070         if (token.name() == bodyTag
2071             || token.name() == colTag
2072             || token.name() == colgroupTag
2073             || token.name() == htmlTag
2074             || isTableBodyContextTag(token.name())
2075             || isTableCellContextTag(token.name())
2076             || token.name() == trTag) {
2077             parseError(token);
2078             return;
2079         }
2080         processEndTagForInBody(token);
2081         break;
2082     case InColumnGroupMode:
2083         ASSERT(insertionMode() == InColumnGroupMode);
2084         if (token.name() == colgroupTag) {
2085             processColgroupEndTagForInColumnGroup();
2086             return;
2087         }
2088         if (token.name() == colTag) {
2089             parseError(token);
2090             return;
2091         }
2092         if (!processColgroupEndTagForInColumnGroup()) {
2093             ASSERT(isParsingFragment());
2094             return;
2095         }
2096         processEndTag(token);
2097         break;
2098     case InRowMode:
2099         ASSERT(insertionMode() == InRowMode);
2100         processEndTagForInRow(token);
2101         break;
2102     case InCellMode:
2103         ASSERT(insertionMode() == InCellMode);
2104         processEndTagForInCell(token);
2105         break;
2106     case InTableBodyMode:
2107         ASSERT(insertionMode() == InTableBodyMode);
2108         processEndTagForInTableBody(token);
2109         break;
2110     case AfterBodyMode:
2111         ASSERT(insertionMode() == AfterBodyMode);
2112         if (token.name() == htmlTag) {
2113             if (isParsingFragment()) {
2114                 parseError(token);
2115                 return;
2116             }
2117             setInsertionMode(AfterAfterBodyMode);
2118             return;
2119         }
2120         // Fall through.
2121     case AfterAfterBodyMode:
2122         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2123         parseError(token);
2124         setInsertionMode(InBodyMode);
2125         processEndTag(token);
2126         break;
2127     case InHeadNoscriptMode:
2128         ASSERT(insertionMode() == InHeadNoscriptMode);
2129         if (token.name() == noscriptTag) {
2130             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2131             m_tree.openElements()->pop();
2132             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2133             setInsertionMode(InHeadMode);
2134             return;
2135         }
2136         if (token.name() != brTag) {
2137             parseError(token);
2138             return;
2139         }
2140         defaultForInHeadNoscript();
2141         processToken(token);
2142         break;
2143     case TextMode:
2144         if (token.name() == scriptTag) {
2145             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2146             m_isPaused = true;
2147             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2148             m_scriptToProcess = m_tree.currentElement();
2149             m_scriptToProcessStartPosition = m_lastScriptElementStartPosition;
2150             m_tree.openElements()->pop();
2151             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2152                 m_scriptToProcess->removeAllChildren();
2153             setInsertionMode(m_originalInsertionMode);
2154
2155             // This token will not have been created by the tokenizer if a
2156             // self-closing script tag was encountered and pre-HTML5 parser
2157             // quirks are enabled. We must set the tokenizer's state to
2158             // DataState explicitly if the tokenizer didn't have a chance to.
2159             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
2160             m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
2161             return;
2162         }
2163         m_tree.openElements()->pop();
2164         setInsertionMode(m_originalInsertionMode);
2165         break;
2166     case InFramesetMode:
2167         ASSERT(insertionMode() == InFramesetMode);
2168         if (token.name() == framesetTag) {
2169             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2170                 parseError(token);
2171                 return;
2172             }
2173             m_tree.openElements()->pop();
2174             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2175                 setInsertionMode(AfterFramesetMode);
2176             return;
2177         }
2178         break;
2179     case AfterFramesetMode:
2180         ASSERT(insertionMode() == AfterFramesetMode);
2181         if (token.name() == htmlTag) {
2182             setInsertionMode(AfterAfterFramesetMode);
2183             return;
2184         }
2185         // Fall through.
2186     case AfterAfterFramesetMode:
2187         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2188         parseError(token);
2189         break;
2190     case InSelectInTableMode:
2191         ASSERT(insertionMode() == InSelectInTableMode);
2192         if (token.name() == captionTag
2193             || token.name() == tableTag
2194             || isTableBodyContextTag(token.name())
2195             || token.name() == trTag
2196             || isTableCellContextTag(token.name())) {
2197             parseError(token);
2198             if (m_tree.openElements()->inTableScope(token.name())) {
2199                 AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
2200                 processEndTag(endSelect);
2201                 processEndTag(token);
2202             }
2203             return;
2204         }
2205         // Fall through.
2206     case InSelectMode:
2207         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2208         if (token.name() == optgroupTag) {
2209             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2210                 processFakeEndTag(optionTag);
2211             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2212                 m_tree.openElements()->pop();
2213                 return;
2214             }
2215             parseError(token);
2216             return;
2217         }
2218         if (token.name() == optionTag) {
2219             if (m_tree.currentNode()->hasTagName(optionTag)) {
2220                 m_tree.openElements()->pop();
2221                 return;
2222             }
2223             parseError(token);
2224             return;
2225         }
2226         if (token.name() == selectTag) {
2227             if (!m_tree.openElements()->inSelectScope(token.name())) {
2228                 ASSERT(isParsingFragment());
2229                 parseError(token);
2230                 return;
2231             }
2232             m_tree.openElements()->popUntilPopped(selectTag.localName());
2233             resetInsertionModeAppropriately();
2234             return;
2235         }
2236         break;
2237     case InTableTextMode:
2238         defaultForInTableText();
2239         processEndTag(token);
2240         break;
2241     }
2242 }
2243
2244 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2245 {
2246     ASSERT(token.type() == HTMLTokenTypes::Comment);
2247     if (m_insertionMode == InitialMode
2248         || m_insertionMode == BeforeHTMLMode
2249         || m_insertionMode == AfterAfterBodyMode
2250         || m_insertionMode == AfterAfterFramesetMode) {
2251         m_tree.insertCommentOnDocument(token);
2252         return;
2253     }
2254     if (m_insertionMode == AfterBodyMode) {
2255         m_tree.insertCommentOnHTMLHtmlElement(token);
2256         return;
2257     }
2258     if (m_insertionMode == InTableTextMode) {
2259         defaultForInTableText();
2260         processComment(token);
2261         return;
2262     }
2263     m_tree.insertComment(token);
2264 }
2265
2266 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2267 {
2268     ASSERT(token.type() == HTMLTokenTypes::Character);
2269     ExternalCharacterTokenBuffer buffer(token);
2270     processCharacterBuffer(buffer);
2271 }
2272
2273 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2274 {
2275 ReprocessBuffer:
2276     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2277     // Note that this logic is different than the generic \r\n collapsing
2278     // handled in the input stream preprocessor. This logic is here as an
2279     // "authoring convenience" so folks can write:
2280     //
2281     // <pre>
2282     // lorem ipsum
2283     // lorem ipsum
2284     // </pre>
2285     //
2286     // without getting an extra newline at the start of their <pre> element.
2287     if (m_shouldSkipLeadingNewline) {
2288         m_shouldSkipLeadingNewline = false;
2289         buffer.skipAtMostOneLeadingNewline();
2290         if (buffer.isEmpty())
2291             return;
2292     }
2293
2294     switch (insertionMode()) {
2295     case InitialMode: {
2296         ASSERT(insertionMode() == InitialMode);
2297         buffer.skipLeadingWhitespace();
2298         if (buffer.isEmpty())
2299             return;
2300         defaultForInitial();
2301         // Fall through.
2302     }
2303     case BeforeHTMLMode: {
2304         ASSERT(insertionMode() == BeforeHTMLMode);
2305         buffer.skipLeadingWhitespace();
2306         if (buffer.isEmpty())
2307             return;
2308         defaultForBeforeHTML();
2309         // Fall through.
2310     }
2311     case BeforeHeadMode: {
2312         ASSERT(insertionMode() == BeforeHeadMode);
2313         buffer.skipLeadingWhitespace();
2314         if (buffer.isEmpty())
2315             return;
2316         defaultForBeforeHead();
2317         // Fall through.
2318     }
2319     case InHeadMode: {
2320         ASSERT(insertionMode() == InHeadMode);
2321         String leadingWhitespace = buffer.takeLeadingWhitespace();
2322         if (!leadingWhitespace.isEmpty())
2323             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2324         if (buffer.isEmpty())
2325             return;
2326         defaultForInHead();
2327         // Fall through.
2328     }
2329     case AfterHeadMode: {
2330         ASSERT(insertionMode() == AfterHeadMode);
2331         String leadingWhitespace = buffer.takeLeadingWhitespace();
2332         if (!leadingWhitespace.isEmpty())
2333             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2334         if (buffer.isEmpty())
2335             return;
2336         defaultForAfterHead();
2337         // Fall through.
2338     }
2339     case InBodyMode:
2340     case InCaptionMode:
2341     case InCellMode: {
2342         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2343         processCharacterBufferForInBody(buffer);
2344         break;
2345     }
2346     case InTableMode:
2347     case InTableBodyMode:
2348     case InRowMode: {
2349         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2350         ASSERT(m_pendingTableCharacters.isEmpty());
2351         if (m_tree.currentNode()->isElementNode()
2352             && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
2353                 || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
2354                 || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
2355                 || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
2356                 || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
2357             m_originalInsertionMode = m_insertionMode;
2358             setInsertionMode(InTableTextMode);
2359             // Note that we fall through to the InTableTextMode case below.
2360         } else {
2361             HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2362             processCharacterBufferForInBody(buffer);
2363             break;
2364         }
2365         // Fall through.
2366     }
2367     case InTableTextMode: {
2368         buffer.giveRemainingTo(m_pendingTableCharacters);
2369         break;
2370     }
2371     case InColumnGroupMode: {
2372         ASSERT(insertionMode() == InColumnGroupMode);
2373         String leadingWhitespace = buffer.takeLeadingWhitespace();
2374         if (!leadingWhitespace.isEmpty())
2375             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2376         if (buffer.isEmpty())
2377             return;
2378         if (!processColgroupEndTagForInColumnGroup()) {
2379             ASSERT(isParsingFragment());
2380             // The spec tells us to drop these characters on the floor.
2381             buffer.skipLeadingNonWhitespace();
2382             if (buffer.isEmpty())
2383                 return;
2384         }
2385         goto ReprocessBuffer;
2386     }
2387     case AfterBodyMode:
2388     case AfterAfterBodyMode: {
2389         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2390         // FIXME: parse error
2391         setInsertionMode(InBodyMode);
2392         goto ReprocessBuffer;
2393         break;
2394     }
2395     case TextMode: {
2396         ASSERT(insertionMode() == TextMode);
2397         m_tree.insertTextNode(buffer.takeRemaining());
2398         break;
2399     }
2400     case InHeadNoscriptMode: {
2401         ASSERT(insertionMode() == InHeadNoscriptMode);
2402         String leadingWhitespace = buffer.takeLeadingWhitespace();
2403         if (!leadingWhitespace.isEmpty())
2404             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2405         if (buffer.isEmpty())
2406             return;
2407         defaultForInHeadNoscript();
2408         goto ReprocessBuffer;
2409         break;
2410     }
2411     case InFramesetMode:
2412     case AfterFramesetMode: {
2413         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2414         String leadingWhitespace = buffer.takeRemainingWhitespace();
2415         if (!leadingWhitespace.isEmpty())
2416             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2417         // FIXME: We should generate a parse error if we skipped over any
2418         // non-whitespace characters.
2419         break;
2420     }
2421     case InSelectInTableMode:
2422     case InSelectMode: {
2423         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2424         m_tree.insertTextNode(buffer.takeRemaining());
2425         break;
2426     }
2427     case AfterAfterFramesetMode: {
2428         String leadingWhitespace = buffer.takeRemainingWhitespace();
2429         if (!leadingWhitespace.isEmpty()) {
2430             m_tree.reconstructTheActiveFormattingElements();
2431             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2432         }
2433         // FIXME: We should generate a parse error if we skipped over any
2434         // non-whitespace characters.
2435         break;
2436     }
2437     }
2438 }
2439
2440 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2441 {
2442     m_tree.reconstructTheActiveFormattingElements();
2443     String characters = buffer.takeRemaining();
2444     m_tree.insertTextNode(characters);
2445     if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2446         m_framesetOk = false;
2447 }
2448
2449 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2450 {
2451     ASSERT(token.type() == HTMLTokenTypes::EndOfFile);
2452     switch (insertionMode()) {
2453     case InitialMode:
2454         ASSERT(insertionMode() == InitialMode);
2455         defaultForInitial();
2456         // Fall through.
2457     case BeforeHTMLMode:
2458         ASSERT(insertionMode() == BeforeHTMLMode);
2459         defaultForBeforeHTML();
2460         // Fall through.
2461     case BeforeHeadMode:
2462         ASSERT(insertionMode() == BeforeHeadMode);
2463         defaultForBeforeHead();
2464         // Fall through.
2465     case InHeadMode:
2466         ASSERT(insertionMode() == InHeadMode);
2467         defaultForInHead();
2468         // Fall through.
2469     case AfterHeadMode:
2470         ASSERT(insertionMode() == AfterHeadMode);
2471         defaultForAfterHead();
2472         // Fall through
2473     case InBodyMode:
2474     case InCellMode:
2475     case InCaptionMode:
2476     case InRowMode:
2477         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2478         notImplemented(); // Emit parse error based on what elements are still open.
2479         break;
2480     case AfterBodyMode:
2481     case AfterAfterBodyMode:
2482         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2483         break;
2484     case InHeadNoscriptMode:
2485         ASSERT(insertionMode() == InHeadNoscriptMode);
2486         defaultForInHeadNoscript();
2487         processEndOfFile(token);
2488         return;
2489     case AfterFramesetMode:
2490     case AfterAfterFramesetMode:
2491         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2492         break;
2493     case InFramesetMode:
2494     case InTableMode:
2495     case InTableBodyMode:
2496     case InSelectInTableMode:
2497     case InSelectMode:
2498         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2499         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2500             parseError(token);
2501         break;
2502     case InColumnGroupMode:
2503         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2504             ASSERT(isParsingFragment());
2505             return; // FIXME: Should we break here instead of returning?
2506         }
2507         if (!processColgroupEndTagForInColumnGroup()) {
2508             ASSERT(isParsingFragment());
2509             return; // FIXME: Should we break here instead of returning?
2510         }
2511         processEndOfFile(token);
2512         return;
2513     case InTableTextMode:
2514         defaultForInTableText();
2515         processEndOfFile(token);
2516         return;
2517     case TextMode:
2518         parseError(token);
2519         if (m_tree.currentNode()->hasTagName(scriptTag))
2520             notImplemented(); // mark the script element as "already started".
2521         m_tree.openElements()->pop();
2522         ASSERT(m_originalInsertionMode != TextMode);
2523         setInsertionMode(m_originalInsertionMode);
2524         processEndOfFile(token);
2525         return;
2526     }
2527     ASSERT(m_tree.currentNode());
2528     m_tree.openElements()->popAll();
2529 }
2530
2531 void HTMLTreeBuilder::defaultForInitial()
2532 {
2533     notImplemented();
2534     if (!m_fragmentContext.fragment())
2535         m_document->setCompatibilityMode(Document::QuirksMode);
2536     // FIXME: parse error
2537     setInsertionMode(BeforeHTMLMode);
2538 }
2539
2540 void HTMLTreeBuilder::defaultForBeforeHTML()
2541 {
2542     AtomicHTMLToken startHTML(HTMLTokenTypes::StartTag, htmlTag.localName());
2543     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2544     setInsertionMode(BeforeHeadMode);
2545 }
2546
2547 void HTMLTreeBuilder::defaultForBeforeHead()
2548 {
2549     AtomicHTMLToken startHead(HTMLTokenTypes::StartTag, headTag.localName());
2550     processStartTag(startHead);
2551 }
2552
2553 void HTMLTreeBuilder::defaultForInHead()
2554 {
2555     AtomicHTMLToken endHead(HTMLTokenTypes::EndTag, headTag.localName());
2556     processEndTag(endHead);
2557 }
2558
2559 void HTMLTreeBuilder::defaultForInHeadNoscript()
2560 {
2561     AtomicHTMLToken endNoscript(HTMLTokenTypes::EndTag, noscriptTag.localName());
2562     processEndTag(endNoscript);
2563 }
2564
2565 void HTMLTreeBuilder::defaultForAfterHead()
2566 {
2567     AtomicHTMLToken startBody(HTMLTokenTypes::StartTag, bodyTag.localName());
2568     processStartTag(startBody);
2569     m_framesetOk = true;
2570 }
2571
2572 void HTMLTreeBuilder::defaultForInTableText()
2573 {
2574     String characters = m_pendingTableCharacters.toString();
2575     m_pendingTableCharacters.clear();
2576     if (!isAllWhitespace(characters)) {
2577         // FIXME: parse error
2578         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2579         m_tree.reconstructTheActiveFormattingElements();
2580         m_tree.insertTextNode(characters, NotAllWhitespace);
2581         m_framesetOk = false;
2582         setInsertionMode(m_originalInsertionMode);
2583         return;
2584     }
2585     m_tree.insertTextNode(characters);
2586     setInsertionMode(m_originalInsertionMode);
2587 }
2588
2589 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2590 {
2591     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2592     if (token.name() == htmlTag) {
2593         m_tree.insertHTMLHtmlStartTagInBody(token);
2594         return true;
2595     }
2596     if (token.name() == baseTag
2597         || token.name() == basefontTag
2598         || token.name() == bgsoundTag
2599         || token.name() == commandTag
2600         || token.name() == linkTag
2601         || token.name() == metaTag) {
2602         m_tree.insertSelfClosingHTMLElement(token);
2603         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2604         return true;
2605     }
2606     if (token.name() == titleTag) {
2607         processGenericRCDATAStartTag(token);
2608         return true;
2609     }
2610     if (token.name() == noscriptTag) {
2611         if (scriptEnabled(m_document->frame())) {
2612             processGenericRawTextStartTag(token);
2613             return true;
2614         }
2615         m_tree.insertHTMLElement(token);
2616         setInsertionMode(InHeadNoscriptMode);
2617         return true;
2618     }
2619     if (token.name() == noframesTag || token.name() == styleTag) {
2620         processGenericRawTextStartTag(token);
2621         return true;
2622     }
2623     if (token.name() == scriptTag) {
2624         processScriptStartTag(token);
2625         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2626             processFakeEndTag(scriptTag);
2627         return true;
2628     }
2629     if (token.name() == headTag) {
2630         parseError(token);
2631         return true;
2632     }
2633     return false;
2634 }
2635
2636 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2637 {
2638     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2639     m_tree.insertHTMLElement(token);
2640     m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
2641     m_originalInsertionMode = m_insertionMode;
2642     setInsertionMode(TextMode);
2643 }
2644
2645 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2646 {
2647     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2648     m_tree.insertHTMLElement(token);
2649     m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
2650     m_originalInsertionMode = m_insertionMode;
2651     setInsertionMode(TextMode);
2652 }
2653
2654 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2655 {
2656     ASSERT(token.type() == HTMLTokenTypes::StartTag);
2657     m_tree.insertScriptElement(token);
2658     m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
2659     m_originalInsertionMode = m_insertionMode;
2660
2661     TextPosition position = m_parser->textPosition();
2662
2663     ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
2664
2665     m_lastScriptElementStartPosition = position;
2666
2667     setInsertionMode(TextMode);
2668 }
2669
2670 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2671 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken& token)
2672 {
2673     if (m_tree.isEmpty())
2674         return false;
2675     ContainerNode* node = m_tree.currentNode();
2676     if (isInHTMLNamespace(node))
2677         return false;
2678     if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
2679         if (token.type() == HTMLTokenTypes::StartTag
2680             && token.name() != MathMLNames::mglyphTag
2681             && token.name() != MathMLNames::malignmarkTag)
2682             return false;
2683         if (token.type() == HTMLTokenTypes::Character)
2684             return false;
2685     }
2686     if (node->hasTagName(MathMLNames::annotation_xmlTag)
2687         && token.type() == HTMLTokenTypes::StartTag
2688         && token.name() == SVGNames::svgTag)
2689         return false;
2690     if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
2691         if (token.type() == HTMLTokenTypes::StartTag)
2692             return false;
2693         if (token.type() == HTMLTokenTypes::Character)
2694             return false;
2695     }
2696     if (token.type() == HTMLTokenTypes::EndOfFile)
2697         return false;
2698     return true;
2699 }
2700
2701 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken& token)
2702 {
2703     switch (token.type()) {
2704     case HTMLTokenTypes::Uninitialized:
2705         ASSERT_NOT_REACHED();
2706         break;
2707     case HTMLTokenTypes::DOCTYPE:
2708         parseError(token);
2709         break;
2710     case HTMLTokenTypes::StartTag: {
2711         if (token.name() == bTag
2712             || token.name() == bigTag
2713             || token.name() == blockquoteTag
2714             || token.name() == bodyTag
2715             || token.name() == brTag
2716             || token.name() == centerTag
2717             || token.name() == codeTag
2718             || token.name() == ddTag
2719             || token.name() == divTag
2720             || token.name() == dlTag
2721             || token.name() == dtTag
2722             || token.name() == emTag
2723             || token.name() == embedTag
2724             || isNumberedHeaderTag(token.name())
2725             || token.name() == headTag
2726             || token.name() == hrTag
2727             || token.name() == iTag
2728             || token.name() == imgTag
2729             || token.name() == liTag
2730             || token.name() == listingTag
2731             || token.name() == menuTag
2732             || token.name() == metaTag
2733             || token.name() == nobrTag
2734             || token.name() == olTag
2735             || token.name() == pTag
2736             || token.name() == preTag
2737             || token.name() == rubyTag
2738             || token.name() == sTag
2739             || token.name() == smallTag
2740             || token.name() == spanTag
2741             || token.name() == strongTag
2742             || token.name() == strikeTag
2743             || token.name() == subTag
2744             || token.name() == supTag
2745             || token.name() == tableTag
2746             || token.name() == ttTag
2747             || token.name() == uTag
2748             || token.name() == ulTag
2749             || token.name() == varTag
2750             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
2751             parseError(token);
2752             m_tree.openElements()->popUntilForeignContentScopeMarker();
2753             processStartTag(token);
2754             return;
2755         }
2756         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
2757         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2758             adjustMathMLAttributes(token);
2759         if (currentNamespace == SVGNames::svgNamespaceURI) {
2760             adjustSVGTagNameCase(token);
2761             adjustSVGAttributes(token);
2762         }
2763         adjustForeignAttributes(token);
2764         m_tree.insertForeignElement(token, currentNamespace);
2765         break;
2766     }
2767     case HTMLTokenTypes::EndTag: {
2768         if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
2769             adjustSVGTagNameCase(token);
2770
2771         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2772             m_isPaused = true;
2773             m_scriptToProcess = m_tree.currentElement();
2774             m_tree.openElements()->pop();
2775             return;
2776         }
2777         if (!isInHTMLNamespace(m_tree.currentNode())) {
2778             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2779             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2780             if (!nodeRecord->node()->hasLocalName(token.name()))
2781                 parseError(token);
2782             while (1) {
2783                 if (nodeRecord->node()->hasLocalName(token.name())) {
2784                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2785                     return;
2786                 }
2787                 nodeRecord = nodeRecord->next();
2788
2789                 if (isInHTMLNamespace(nodeRecord->node()))
2790                     break;
2791             }
2792         }
2793         // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2794         processEndTag(token);
2795         break;
2796     }
2797     case HTMLTokenTypes::Comment:
2798         m_tree.insertComment(token);
2799         return;
2800     case HTMLTokenTypes::Character: {
2801         String characters = String(token.characters().data(), token.characters().size());
2802         m_tree.insertTextNode(characters);
2803         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2804             m_framesetOk = false;
2805         break;
2806     }
2807     case HTMLTokenTypes::EndOfFile:
2808         ASSERT_NOT_REACHED();
2809         break;
2810     }
2811 }
2812
2813 void HTMLTreeBuilder::finished()
2814 {
2815     if (isParsingFragment())
2816         return;
2817     
2818     ASSERT(m_document);
2819     // Warning, this may detach the parser. Do not do anything else after this.
2820     m_document->finishedParsing();
2821 }
2822
2823 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2824 {
2825 }
2826
2827 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2828 {
2829     if (!frame)
2830         return false;
2831     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2832 }
2833
2834 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2835 {
2836     if (!frame)
2837         return false;
2838     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2839 }
2840
2841 }