ebf24ad16d0a767e657a64d8f3c1130b8c79e6ea
[WebKit-https.git] / WebCore / html / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "Comment.h"
30 #include "DocumentFragment.h"
31 #include "DocumentType.h"
32 #include "Element.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLElementFactory.h"
36 #include "HTMLHtmlElement.h"
37 #include "HTMLNames.h"
38 #include "HTMLScriptElement.h"
39 #include "HTMLToken.h"
40 #include "HTMLTokenizer.h"
41 #include "LegacyHTMLDocumentParser.h"
42 #include "LegacyHTMLTreeBuilder.h"
43 #include "LocalizedStrings.h"
44 #if ENABLE(MATHML)
45 #include "MathMLNames.h"
46 #endif
47 #include "NotImplemented.h"
48 #if ENABLE(SVG)
49 #include "SVGNames.h"
50 #endif
51 #include "ScriptController.h"
52 #include "Settings.h"
53 #include "Text.h"
54 #include <wtf/UnusedParam.h>
55
56 namespace WebCore {
57
58 using namespace HTMLNames;
59
60 static const int uninitializedLineNumberValue = -1;
61
62 namespace {
63
64 inline bool isTreeBuilderWhitepace(UChar cc)
65 {
66     return cc == '\t' || cc == '\x0A' || cc == '\x0C' || cc == '\x0D' || cc == ' ';
67 }
68
69 inline bool hasNonWhitespace(const String& string)
70 {
71     const UChar* characters = string.characters();
72     const unsigned length = string.length();
73     for (unsigned i = 0; i < length; ++i) {
74         if (!isTreeBuilderWhitepace(characters[i]))
75             return true;
76     }
77     return false;
78 }
79
80 bool shouldUseLegacyTreeBuilder(Document* document)
81 {
82     return !document->settings() || !document->settings()->html5TreeBuilderEnabled();
83 }
84
85 bool isNumberedHeaderTag(const AtomicString& tagName)
86 {
87     return tagName == h1Tag
88         || tagName == h2Tag
89         || tagName == h3Tag
90         || tagName == h4Tag
91         || tagName == h5Tag
92         || tagName == h6Tag;
93 }
94
95 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
96 {
97     return tagName == captionTag
98         || tagName == colTag
99         || tagName == colgroupTag;
100 }
101
102 bool isTableCellContextTag(const AtomicString& tagName)
103 {
104     return tagName == thTag || tagName == tdTag;
105 }
106
107 bool isTableBodyContextTag(const AtomicString& tagName)
108 {
109     return tagName == tbodyTag
110         || tagName == tfootTag
111         || tagName == theadTag;
112 }
113
114 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
115 bool isSpecialTag(const AtomicString& tagName)
116 {
117     return tagName == addressTag
118         || tagName == articleTag
119         || tagName == asideTag
120         || tagName == baseTag
121         || tagName == basefontTag
122         || tagName == "bgsound"
123         || tagName == blockquoteTag
124         || tagName == bodyTag
125         || tagName == brTag
126         || tagName == buttonTag
127         || tagName == centerTag
128         || tagName == colTag
129         || tagName == colgroupTag
130         || tagName == "command"
131         || tagName == ddTag
132         || tagName == "details"
133         || tagName == dirTag
134         || tagName == divTag
135         || tagName == dlTag
136         || tagName == dtTag
137         || tagName == embedTag
138         || tagName == fieldsetTag
139         || tagName == "figure"
140         || tagName == footerTag
141         || tagName == formTag
142         || tagName == frameTag
143         || tagName == framesetTag
144         || isNumberedHeaderTag(tagName)
145         || tagName == headTag
146         || tagName == headerTag
147         || tagName == hgroupTag
148         || tagName == hrTag
149         || tagName == iframeTag
150         || tagName == imgTag
151         || tagName == inputTag
152         || tagName == isindexTag
153         || tagName == liTag
154         || tagName == linkTag
155         || tagName == listingTag
156         || tagName == menuTag
157         || tagName == metaTag
158         || tagName == navTag
159         || tagName == noembedTag
160         || tagName == noframesTag
161         || tagName == noscriptTag
162         || tagName == olTag
163         || tagName == pTag
164         || tagName == paramTag
165         || tagName == plaintextTag
166         || tagName == preTag
167         || tagName == scriptTag
168         || tagName == sectionTag
169         || tagName == selectTag
170         || tagName == styleTag
171         || isTableBodyContextTag(tagName)
172         || tagName == textareaTag
173         || tagName == titleTag
174         || tagName == trTag
175         || tagName == ulTag
176         || tagName == wbrTag
177         || tagName == xmpTag;
178 }
179
180 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#scoping
181 // Same as isScopingTag in LegacyHTMLTreeBuilder.cpp
182 // and isScopeMarker in HTMLElementStack.cpp
183 bool isScopingTag(const AtomicString& tagName)
184 {
185     return tagName == appletTag
186         || tagName == buttonTag
187         || tagName == captionTag
188 #if ENABLE(SVG_FOREIGN_OBJECT)
189         || tagName == SVGNames::foreignObjectTag
190 #endif
191         || tagName == htmlTag
192         || tagName == marqueeTag
193         || tagName == objectTag
194         || tagName == tableTag
195         || isTableCellContextTag(tagName);
196 }
197
198 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
199 {
200     return tagName == bTag
201         || tagName == bigTag
202         || tagName == codeTag
203         || tagName == emTag
204         || tagName == fontTag
205         || tagName == iTag
206         || tagName == sTag
207         || tagName == smallTag
208         || tagName == strikeTag
209         || tagName == strongTag
210         || tagName == ttTag
211         || tagName == uTag;
212 }
213
214 bool isNonAnchorFormattingTag(const AtomicString& tagName)
215 {
216     return tagName == nobrTag
217         || isNonAnchorNonNobrFormattingTag(tagName);
218 }
219
220 bool requiresRedirectToFosterParent(Element* element)
221 {
222     return element->hasTagName(tableTag)
223         || isTableBodyContextTag(element->localName())
224         || element->hasTagName(trTag);
225 }
226
227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
228 bool isFormattingTag(const AtomicString& tagName)
229 {
230     return tagName == aTag || isNonAnchorFormattingTag(tagName);
231 }
232
233 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#phrasing
234 bool isPhrasingTag(const AtomicString& tagName)
235 {
236     return !isSpecialTag(tagName) && !isScopingTag(tagName) && !isFormattingTag(tagName);
237 }
238
239 bool isNotFormattingAndNotPhrasing(const Element* element)
240 {
241     // The spec often says "node is not in the formatting category, and is not
242     // in the phrasing category". !phrasing && !formatting == scoping || special
243     // scoping || special is easier to compute.
244     // FIXME: localName() is wrong for non-html content.
245     const AtomicString& tagName = element->localName();
246     return isScopingTag(tagName) || isSpecialTag(tagName);
247 }
248
249 } // namespace
250
251 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors)
252     : m_framesetOk(true)
253     , m_document(document)
254     , m_tree(document, FragmentScriptingAllowed)
255     , m_reportErrors(reportErrors)
256     , m_isPaused(false)
257     , m_insertionMode(InitialMode)
258     , m_originalInsertionMode(InitialMode)
259     , m_secondaryInsertionMode(InitialMode)
260     , m_tokenizer(tokenizer)
261     , m_legacyTreeBuilder(shouldUseLegacyTreeBuilder(document) ? new LegacyHTMLTreeBuilder(document, reportErrors) : 0)
262     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
263     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
264     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
265     , m_isParsingFragment(false)
266 {
267 }
268
269 // FIXME: Member variables should be grouped into self-initializing structs to
270 // minimize code duplication between these constructors.
271 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
272     : m_framesetOk(true)
273     , m_document(fragment->document())
274     , m_tree(fragment->document(), scriptingPermission)
275     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
276     , m_isPaused(false)
277     , m_insertionMode(InitialMode)
278     , m_originalInsertionMode(InitialMode)
279     , m_secondaryInsertionMode(InitialMode)
280     , m_tokenizer(tokenizer)
281     , m_legacyTreeBuilder(new LegacyHTMLTreeBuilder(fragment, scriptingPermission))
282     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
283     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
284     , m_fragmentScriptingPermission(scriptingPermission)
285     , m_isParsingFragment(true)
286 {
287 }
288
289 HTMLTreeBuilder::~HTMLTreeBuilder()
290 {
291 }
292
293 static void convertToOldStyle(AtomicHTMLToken& token, Token& oldStyleToken)
294 {
295     switch (token.type()) {
296     case HTMLToken::Uninitialized:
297     case HTMLToken::DOCTYPE:
298         ASSERT_NOT_REACHED();
299         break;
300     case HTMLToken::EndOfFile:
301         ASSERT_NOT_REACHED();
302         notImplemented();
303         break;
304     case HTMLToken::StartTag:
305     case HTMLToken::EndTag: {
306         oldStyleToken.beginTag = (token.type() == HTMLToken::StartTag);
307         oldStyleToken.selfClosingTag = token.selfClosing();
308         oldStyleToken.tagName = token.name();
309         oldStyleToken.attrs = token.takeAtributes();
310         break;
311     }
312     case HTMLToken::Comment:
313         oldStyleToken.tagName = commentAtom;
314         oldStyleToken.text = token.comment().impl();
315         break;
316     case HTMLToken::Character:
317         oldStyleToken.tagName = textAtom;
318         oldStyleToken.text = token.characters().impl();
319         break;
320     }
321 }
322
323 void HTMLTreeBuilder::handleScriptStartTag()
324 {
325     notImplemented(); // The HTML frgment case?
326     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
327     notImplemented(); // Save insertion mode.
328 }
329
330 void HTMLTreeBuilder::handleScriptEndTag(Element* scriptElement, int scriptStartLine)
331 {
332     ASSERT(!m_scriptToProcess); // Caller never called takeScriptToProcess!
333     ASSERT(m_scriptToProcessStartLine == uninitializedLineNumberValue); // Caller never called takeScriptToProcess!
334     notImplemented(); // Save insertion mode and insertion point?
335
336     // Pause ourselves so that parsing stops until the script can be processed by the caller.
337     m_isPaused = true;
338     m_scriptToProcess = scriptElement;
339     // Lexer line numbers are 0-based, ScriptSourceCode expects 1-based lines,
340     // so we convert here before passing the line number off to HTMLScriptRunner.
341     m_scriptToProcessStartLine = scriptStartLine + 1;
342 }
343
344 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
345 {
346     // Unpause ourselves, callers may pause us again when processing the script.
347     // The HTML5 spec is written as though scripts are executed inside the tree
348     // builder.  We pause the parser to exit the tree builder, and then resume
349     // before running scripts.
350     m_isPaused = false;
351     scriptStartLine = m_scriptToProcessStartLine;
352     m_scriptToProcessStartLine = uninitializedLineNumberValue;
353     return m_scriptToProcess.release();
354 }
355
356 HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
357 {
358     if (tagName == textareaTag || tagName == titleTag)
359         return HTMLTokenizer::RCDATAState;
360
361     if (tagName == styleTag
362         || tagName == iframeTag
363         || tagName == xmpTag
364         || tagName == noembedTag
365         || tagName == noframesTag
366         || (tagName == noscriptTag && isScriptingFlagEnabled(frame)))
367         return HTMLTokenizer::RAWTEXTState;
368
369     if (tagName == plaintextTag)
370         return HTMLTokenizer::PLAINTEXTState;
371
372     return state;
373 }
374
375 void HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token)
376 {
377     if (token.type() == HTMLToken::DOCTYPE) {
378         DoctypeToken doctypeToken;
379         doctypeToken.m_name.append(token.name().data(), token.name().size());
380         doctypeToken.m_publicID = token.publicIdentifier();
381         doctypeToken.m_systemID = token.systemIdentifier();
382         doctypeToken.m_forceQuirks = token.forceQuirks();
383
384         m_legacyTreeBuilder->parseDoctypeToken(&doctypeToken);
385         return;
386     }
387
388     if (token.type() == HTMLToken::EndOfFile)
389         return;
390
391     // For now, we translate into an old-style token for testing.
392     Token oldStyleToken;
393     AtomicHTMLToken atomicToken(token);
394     convertToOldStyle(atomicToken, oldStyleToken);
395
396     RefPtr<Node> result =  m_legacyTreeBuilder->parseToken(&oldStyleToken);
397     if (token.type() == HTMLToken::StartTag) {
398         // This work is supposed to be done by the parser, but
399         // when using the old parser for we have to do this manually.
400         if (oldStyleToken.tagName == scriptTag) {
401             handleScriptStartTag();
402             m_lastScriptElement = static_pointer_cast<Element>(result);
403             m_lastScriptElementStartLine = m_tokenizer->lineNumber();
404         } else if (oldStyleToken.tagName == preTag || oldStyleToken.tagName == listingTag)
405             m_tokenizer->skipLeadingNewLineForListing();
406         else
407             m_tokenizer->setState(adjustedLexerState(m_tokenizer->state(), oldStyleToken.tagName, m_document->frame()));
408     } else if (token.type() == HTMLToken::EndTag) {
409         if (oldStyleToken.tagName == scriptTag) {
410             if (m_lastScriptElement) {
411                 ASSERT(m_lastScriptElementStartLine != uninitializedLineNumberValue);
412                 if (m_fragmentScriptingPermission == FragmentScriptingNotAllowed) {
413                     // FIXME: This is a horrible hack for platform/Pasteboard.
414                     // Clear the <script> tag when using the Parser to create
415                     // a DocumentFragment for pasting so that javascript content
416                     // does not show up in pasted HTML.
417                     m_lastScriptElement->removeChildren();
418                 } else if (insertionMode() != AfterFramesetMode)
419                     handleScriptEndTag(m_lastScriptElement.get(), m_lastScriptElementStartLine);
420                 m_lastScriptElement = 0;
421                 m_lastScriptElementStartLine = uninitializedLineNumberValue;
422             }
423         } else if (oldStyleToken.tagName == framesetTag)
424             setInsertionMode(AfterFramesetMode);
425     }
426 }
427
428 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
429 {
430     if (m_legacyTreeBuilder) {
431         passTokenToLegacyParser(rawToken);
432         return;
433     }
434
435     AtomicHTMLToken token(rawToken);
436     processToken(token);
437 }
438
439 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
440 {
441     switch (token.type()) {
442     case HTMLToken::Uninitialized:
443         ASSERT_NOT_REACHED();
444         break;
445     case HTMLToken::DOCTYPE:
446         processDoctypeToken(token);
447         break;
448     case HTMLToken::StartTag:
449         processStartTag(token);
450         break;
451     case HTMLToken::EndTag:
452         processEndTag(token);
453         break;
454     case HTMLToken::Comment:
455         processComment(token);
456         return;
457     case HTMLToken::Character:
458         processCharacter(token);
459         break;
460     case HTMLToken::EndOfFile:
461         processEndOfFile(token);
462         break;
463     }
464 }
465
466 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
467 {
468     ASSERT(token.type() == HTMLToken::DOCTYPE);
469     if (insertionMode() == InitialMode) {
470         m_tree.insertDoctype(token);
471         return;
472     }
473     parseError(token);
474 }
475
476 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
477 {
478     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
479     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
480     processStartTag(fakeToken);
481 }
482
483 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
484 {
485     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
486     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
487     processEndTag(fakeToken);
488 }
489
490 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
491 {
492     AtomicHTMLToken fakeToken(characters);
493     processCharacter(fakeToken);
494 }
495
496 void HTMLTreeBuilder::processFakePEndTagIfPInScope()
497 {
498     if (!m_tree.openElements()->inScope(pTag.localName()))
499         return;
500     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
501     processEndTag(endP);
502 }
503
504 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
505 {
506     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
507     if (!attributes)
508         attributes = NamedNodeMap::create();
509     else {
510         attributes->removeAttribute(nameAttr);
511         attributes->removeAttribute(actionAttr);
512         attributes->removeAttribute(promptAttr);
513     }
514
515     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
516     attributes->insertAttribute(mappedAttribute.release(), false);
517     return attributes.release();
518 }
519
520 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
521 {
522     ASSERT(token.type() == HTMLToken::StartTag);
523     ASSERT(token.name() == isindexTag);
524     parseError(token);
525     if (m_tree.form())
526         return;
527     notImplemented(); // Acknowledge self-closing flag
528     processFakeStartTag(formTag);
529     Attribute* actionAttribute = token.getAttributeItem(actionAttr);
530     if (actionAttribute) {
531         ASSERT(m_tree.currentElement()->hasTagName(formTag));
532         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
533     }
534     processFakeStartTag(hrTag);
535     processFakeStartTag(labelTag);
536     Attribute* promptAttribute = token.getAttributeItem(promptAttr);
537     if (promptAttribute)
538         processFakeCharacters(promptAttribute->value());
539     else
540         processFakeCharacters(searchableIndexIntroduction());
541     processFakeStartTag(inputTag, attributesForIsindexInput(token));
542     notImplemented(); // This second set of characters may be needed by non-english locales.
543     processFakeEndTag(labelTag);
544     processFakeStartTag(hrTag);
545     processFakeEndTag(formTag);
546 }
547
548 namespace {
549
550 bool isLi(const Element* element)
551 {
552     return element->hasTagName(liTag);
553 }
554
555 bool isDdOrDt(const Element* element)
556 {
557     return element->hasTagName(ddTag)
558         || element->hasTagName(dtTag);
559 }
560
561 }
562
563 template <bool shouldClose(const Element*)>
564 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
565 {
566     m_framesetOk = false;
567     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
568     while (1) {
569         Element* node = nodeRecord->element();
570         if (shouldClose(node)) {
571             processFakeEndTag(node->tagQName());
572             break;
573         }
574         if (isNotFormattingAndNotPhrasing(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
575             break;
576         nodeRecord = nodeRecord->next();
577     }
578     processFakePEndTagIfPInScope();
579     m_tree.insertHTMLElement(token);
580 }
581
582 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
583 {
584     ASSERT(token.type() == HTMLToken::StartTag);
585     if (token.name() == htmlTag) {
586         m_tree.insertHTMLHtmlStartTagInBody(token);
587         return;
588     }
589     if (token.name() == baseTag
590         || token.name() == "command"
591         || token.name() == linkTag
592         || token.name() == metaTag
593         || token.name() == noframesTag
594         || token.name() == scriptTag
595         || token.name() == styleTag
596         || token.name() == titleTag) {
597         bool didProcess = processStartTagForInHead(token);
598         ASSERT_UNUSED(didProcess, didProcess);
599         return;
600     }
601     if (token.name() == bodyTag) {
602         m_tree.insertHTMLBodyStartTagInBody(token);
603         return;
604     }
605     if (token.name() == framesetTag) {
606         parseError(token);
607         notImplemented(); // fragment case
608         if (!m_framesetOk)
609             return;
610         ExceptionCode ec = 0;
611         m_tree.openElements()->bodyElement()->remove(ec);
612         ASSERT(!ec);
613         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
614         m_tree.openElements()->popHTMLBodyElement();
615         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
616         m_tree.insertHTMLElement(token);
617         m_insertionMode = InFramesetMode;
618         return;
619     }
620     if (token.name() == addressTag
621         || token.name() == articleTag
622         || token.name() == asideTag
623         || token.name() == blockquoteTag
624         || token.name() == centerTag
625         || token.name() == "details"
626         || token.name() == dirTag
627         || token.name() == divTag
628         || token.name() == dlTag
629         || token.name() == fieldsetTag
630         || token.name() == "figure"
631         || token.name() == footerTag
632         || token.name() == headerTag
633         || token.name() == hgroupTag
634         || token.name() == menuTag
635         || token.name() == navTag
636         || token.name() == olTag
637         || token.name() == pTag
638         || token.name() == sectionTag
639         || token.name() == ulTag) {
640         processFakePEndTagIfPInScope();
641         m_tree.insertHTMLElement(token);
642         return;
643     }
644     if (isNumberedHeaderTag(token.name())) {
645         processFakePEndTagIfPInScope();
646         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
647             parseError(token);
648             m_tree.openElements()->pop();
649         }
650         m_tree.insertHTMLElement(token);
651         return;
652     }
653     if (token.name() == preTag || token.name() == listingTag) {
654         processFakePEndTagIfPInScope();
655         m_tree.insertHTMLElement(token);
656         m_tokenizer->skipLeadingNewLineForListing();
657         m_framesetOk = false;
658         return;
659     }
660     if (token.name() == formTag) {
661         if (m_tree.form()) {
662             parseError(token);
663             return;
664         }
665         processFakePEndTagIfPInScope();
666         m_tree.insertHTMLElement(token);
667         m_tree.setForm(m_tree.currentElement());
668         return;
669     }
670     if (token.name() == liTag) {
671         processCloseWhenNestedTag<isLi>(token);
672         return;
673     }
674     if (token.name() == ddTag || token.name() == dtTag) {
675         processCloseWhenNestedTag<isDdOrDt>(token);
676         return;
677     }
678     if (token.name() == plaintextTag) {
679         processFakePEndTagIfPInScope();
680         m_tree.insertHTMLElement(token);
681         m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
682         return;
683     }
684     if (token.name() == buttonTag) {
685         if (m_tree.openElements()->inScope(buttonTag)) {
686             parseError(token);
687             processFakeEndTag(buttonTag);
688             processStartTag(token); // FIXME: Could we just fall through here?
689             return;
690         }
691         m_tree.reconstructTheActiveFormattingElements();
692         m_tree.insertHTMLElement(token);
693         m_framesetOk = false;
694         return;
695     }
696     if (token.name() == aTag) {
697         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
698         if (activeATag) {
699             parseError(token);
700             processFakeEndTag(aTag);
701             m_tree.activeFormattingElements()->remove(activeATag);
702             if (m_tree.openElements()->contains(activeATag))
703                 m_tree.openElements()->remove(activeATag);
704         }
705         m_tree.reconstructTheActiveFormattingElements();
706         m_tree.insertFormattingElement(token);
707         return;
708     }
709     if (isNonAnchorNonNobrFormattingTag(token.name())) {
710         m_tree.reconstructTheActiveFormattingElements();
711         m_tree.insertFormattingElement(token);
712         return;
713     }
714     if (token.name() == nobrTag) {
715         m_tree.reconstructTheActiveFormattingElements();
716         if (m_tree.openElements()->inScope(nobrTag)) {
717             parseError(token);
718             processFakeEndTag(nobrTag);
719             m_tree.reconstructTheActiveFormattingElements();
720         }
721         m_tree.insertFormattingElement(token);
722         return;
723     }
724     if (token.name() == appletTag
725         || token.name() == marqueeTag
726         || token.name() == objectTag) {
727         m_tree.reconstructTheActiveFormattingElements();
728         m_tree.insertHTMLElement(token);
729         m_tree.activeFormattingElements()->appendMarker();
730         m_framesetOk = false;
731         return;
732     }
733     if (token.name() == tableTag) {
734         if (m_document->parseMode() != Document::Compat && m_tree.openElements()->inScope(pTag))
735             processFakeEndTag(pTag);
736         m_tree.insertHTMLElement(token);
737         m_framesetOk = false;
738         m_insertionMode = InTableMode;
739         return;
740     }
741     if (token.name() == imageTag) {
742         parseError(token);
743         // Apparently we're not supposed to ask.
744         token.setName(imgTag.localName());
745         // Note the fall through to the imgTag handling below!
746     }
747     if (token.name() == areaTag
748         || token.name() == basefontTag
749         || token.name() == "bgsound"
750         || token.name() == brTag
751         || token.name() == embedTag
752         || token.name() == imgTag
753         || token.name() == inputTag
754         || token.name() == keygenTag
755         || token.name() == wbrTag) {
756         m_tree.reconstructTheActiveFormattingElements();
757         m_tree.insertSelfClosingHTMLElement(token);
758         m_framesetOk = false;
759         return;
760     }
761     if (token.name() == paramTag
762         || token.name() == sourceTag
763         || token.name() == "track") {
764         m_tree.insertSelfClosingHTMLElement(token);
765         return;
766     }
767     if (token.name() == hrTag) {
768         processFakePEndTagIfPInScope();
769         m_tree.insertSelfClosingHTMLElement(token);
770         m_framesetOk = false;
771         return;
772     }
773     if (token.name() == isindexTag) {
774         processIsindexStartTagForInBody(token);
775         return;
776     }
777     if (token.name() == textareaTag) {
778         m_tree.insertHTMLElement(token);
779         m_tokenizer->skipLeadingNewLineForListing();
780         m_tokenizer->setState(HTMLTokenizer::RCDATAState);
781         m_originalInsertionMode = m_insertionMode;
782         m_framesetOk = false;
783         m_insertionMode = TextMode;
784         return;
785     }
786     if (token.name() == xmpTag) {
787         processFakePEndTagIfPInScope();
788         m_tree.reconstructTheActiveFormattingElements();
789         m_framesetOk = false;
790         processGenericRawTextStartTag(token);
791         return;
792     }
793     if (token.name() == iframeTag) {
794         m_framesetOk = false;
795         processGenericRawTextStartTag(token);
796         return;
797     }
798     if (token.name() == noembedTag) {
799         processGenericRawTextStartTag(token);
800         return;
801     }
802     if (token.name() == noscriptTag && isScriptingFlagEnabled(m_document->frame())) {
803         processGenericRawTextStartTag(token);
804         return;
805     }
806     if (token.name() == selectTag) {
807         m_tree.reconstructTheActiveFormattingElements();
808         m_tree.insertHTMLElement(token);
809         m_framesetOk = false;
810         if (m_insertionMode == InTableMode
811              || m_insertionMode == InCaptionMode
812              || m_insertionMode == InColumnGroupMode
813              || m_insertionMode == InTableBodyMode
814              || m_insertionMode == InRowMode
815              || m_insertionMode == InCellMode)
816             m_insertionMode = InSelectInTableMode;
817         else
818             m_insertionMode = InSelectMode;
819         return;
820     }
821     if (token.name() == optgroupTag || token.name() == optionTag) {
822         if (m_tree.openElements()->inScope(optionTag.localName())) {
823             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
824             processEndTag(endOption);
825         }
826         m_tree.reconstructTheActiveFormattingElements();
827         m_tree.insertHTMLElement(token);
828         return;
829     }
830     if (token.name() == rpTag || token.name() == rtTag) {
831         if (m_tree.openElements()->inScope(rubyTag.localName())) {
832             m_tree.generateImpliedEndTags();
833             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
834                 parseError(token);
835                 m_tree.openElements()->popUntil(rubyTag.localName());
836             }
837         }
838         m_tree.insertHTMLElement(token);
839         return;
840     }
841     if (token.name() == "math") {
842         // This is the MathML foreign content branch point.
843         notImplemented();
844     }
845     if (token.name() == "svg") {
846         // This is the SVG foreign content branch point.
847         notImplemented();
848     }
849     if (isCaptionColOrColgroupTag(token.name())
850         || token.name() == frameTag
851         || token.name() == headTag
852         || isTableBodyContextTag(token.name())
853         || isTableCellContextTag(token.name())
854         || token.name() == trTag) {
855         parseError(token);
856         return;
857     }
858     m_tree.reconstructTheActiveFormattingElements();
859     m_tree.insertHTMLElement(token);
860 }
861
862 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
863 {
864     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
865         ASSERT(m_isParsingFragment);
866         // FIXME: parse error
867         return false;
868     }
869     m_tree.openElements()->pop();
870     m_insertionMode = InTableMode;
871     return true;
872 }
873
874 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
875 void HTMLTreeBuilder::closeTheCell()
876 {
877     ASSERT(insertionMode() == InCellMode);
878     if (m_tree.openElements()->inScope(tdTag)) {
879         ASSERT(!m_tree.openElements()->inScope(thTag));
880         processFakeEndTag(tdTag);
881         return;
882     }
883     ASSERT(m_tree.openElements()->inScope(thTag));
884     processFakeEndTag(thTag);
885     ASSERT(insertionMode() == InRowMode);
886 }
887
888 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
889 {
890     ASSERT(token.type() == HTMLToken::StartTag);
891     if (token.name() == captionTag) {
892         m_tree.openElements()->popUntilTableScopeMarker();
893         m_tree.activeFormattingElements()->appendMarker();
894         m_tree.insertHTMLElement(token);
895         m_insertionMode = InCaptionMode;
896         return;
897     }
898     if (token.name() == colgroupTag) {
899         m_tree.openElements()->popUntilTableScopeMarker();
900         m_tree.insertHTMLElement(token);
901         m_insertionMode = InColumnGroupMode;
902         return;
903     }
904     if (token.name() == colTag) {
905         processFakeStartTag(colgroupTag);
906         ASSERT(InColumnGroupMode);
907         processStartTag(token);
908         return;
909     }
910     if (isTableBodyContextTag(token.name())) {
911         m_tree.openElements()->popUntilTableScopeMarker();
912         m_tree.insertHTMLElement(token);
913         m_insertionMode = InTableBodyMode;
914         return;
915     }
916     if (isTableCellContextTag(token.name())
917         || token.name() == trTag) {
918         processFakeStartTag(tbodyTag);
919         ASSERT(insertionMode() == InTableBodyMode);
920         processStartTag(token);
921         return;
922     }
923     if (token.name() == tableTag) {
924         parseError(token);
925         if (!processTableEndTagForInTable()) {
926             ASSERT(m_isParsingFragment);
927             return;
928         }
929         processStartTag(token);
930         return;
931     }
932     if (token.name() == styleTag || token.name() == scriptTag) {
933         processStartTagForInHead(token);
934         return;
935     }
936     if (token.name() == inputTag) {
937         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
938         if (!typeAttribute || equalIgnoringCase(typeAttribute->value(), "hidden")) {
939             parseError(token);
940             m_tree.insertSelfClosingHTMLElement(token);
941             return;
942         }
943         // Fall through to "anything else" case.
944     }
945     if (token.name() == formTag) {
946         parseError(token);
947         if (m_tree.form())
948             return;
949         m_tree.insertSelfClosingHTMLElement(token);
950         return;
951     }
952     parseError(token);
953     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree, requiresRedirectToFosterParent(m_tree.currentElement()));
954     processStartTagForInBody(token);
955 }
956
957 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
958 {
959     ASSERT(token.type() == HTMLToken::StartTag);
960     switch (insertionMode()) {
961     case InitialMode:
962         ASSERT(insertionMode() == InitialMode);
963         processDefaultForInitialMode(token);
964         // Fall through.
965     case BeforeHTMLMode:
966         ASSERT(insertionMode() == BeforeHTMLMode);
967         if (token.name() == htmlTag) {
968             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
969             setInsertionMode(BeforeHeadMode);
970             return;
971         }
972         processDefaultForBeforeHTMLMode(token);
973         // Fall through.
974     case BeforeHeadMode:
975         ASSERT(insertionMode() == BeforeHeadMode);
976         if (token.name() == htmlTag) {
977             m_tree.insertHTMLHtmlStartTagInBody(token);
978             return;
979         }
980         if (token.name() == headTag) {
981             m_tree.insertHTMLHeadElement(token);
982             setInsertionMode(InHeadMode);
983             return;
984         }
985         processDefaultForBeforeHeadMode(token);
986         // Fall through.
987     case InHeadMode:
988         ASSERT(insertionMode() == InHeadMode);
989         if (processStartTagForInHead(token))
990             return;
991         processDefaultForInHeadMode(token);
992         // Fall through.
993     case AfterHeadMode:
994         ASSERT(insertionMode() == AfterHeadMode);
995         if (token.name() == htmlTag) {
996             m_tree.insertHTMLHtmlStartTagInBody(token);
997             return;
998         }
999         if (token.name() == bodyTag) {
1000             m_framesetOk = false;
1001             m_tree.insertHTMLBodyElement(token);
1002             m_insertionMode = InBodyMode;
1003             return;
1004         }
1005         if (token.name() == framesetTag) {
1006             m_tree.insertHTMLElement(token);
1007             setInsertionMode(InFramesetMode);
1008             return;
1009         }
1010         if (token.name() == baseTag
1011             || token.name() == linkTag
1012             || token.name() == metaTag
1013             || token.name() == noframesTag
1014             || token.name() == scriptTag
1015             || token.name() == styleTag
1016             || token.name() == titleTag) {
1017             parseError(token);
1018             ASSERT(m_tree.head());
1019             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1020             processStartTagForInHead(token);
1021             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1022             return;
1023         }
1024         if (token.name() == headTag) {
1025             parseError(token);
1026             return;
1027         }
1028         processDefaultForAfterHeadMode(token);
1029         // Fall through
1030     case InBodyMode:
1031         ASSERT(insertionMode() == InBodyMode);
1032         processStartTagForInBody(token);
1033         break;
1034     case InTableMode:
1035         ASSERT(insertionMode() == InTableMode);
1036         processStartTagForInTable(token);
1037         break;
1038     case InCaptionMode:
1039         ASSERT(insertionMode() == InCaptionMode);
1040         if (isCaptionColOrColgroupTag(token.name())
1041             || isTableBodyContextTag(token.name())
1042             || isTableCellContextTag(token.name())
1043             || token.name() == trTag) {
1044             parseError(token);
1045             if (!processCaptionEndTagForInCaption()) {
1046                 ASSERT(m_isParsingFragment);
1047                 return;
1048             }
1049             processStartTag(token);
1050             return;
1051         }
1052         processStartTagForInBody(token);
1053         break;
1054     case InColumnGroupMode:
1055         ASSERT(insertionMode() == InColumnGroupMode);
1056         if (token.name() == htmlTag) {
1057             m_tree.insertHTMLHtmlStartTagInBody(token);
1058             return;
1059         }
1060         if (token.name() == colTag) {
1061             m_tree.insertSelfClosingHTMLElement(token);
1062             return;
1063         }
1064         if (!processColgroupEndTagForInColumnGroup()) {
1065             ASSERT(m_isParsingFragment);
1066             return;
1067         }
1068         processStartTag(token);
1069         break;
1070     case InTableBodyMode:
1071         ASSERT(insertionMode() == InTableBodyMode);
1072         if (token.name() == trTag) {
1073             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1074             m_tree.insertHTMLElement(token);
1075             m_insertionMode = InRowMode;
1076             return;
1077         }
1078         if (isTableCellContextTag(token.name())) {
1079             parseError(token);
1080             processFakeStartTag(trTag);
1081             ASSERT(insertionMode() == InRowMode);
1082             processStartTag(token);
1083             return;
1084         }
1085         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1086             // FIXME: This is slow.
1087             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1088                 ASSERT(m_isParsingFragment);
1089                 parseError(token);
1090                 return;
1091             }
1092             m_tree.openElements()->popUntilTableBodyScopeMarker();
1093             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1094             processFakeEndTag(m_tree.currentElement()->tagQName());
1095             processStartTag(token);
1096             return;
1097         }
1098         processStartTagForInTable(token);
1099         break;
1100     case InRowMode:
1101         ASSERT(insertionMode() == InRowMode);
1102         if (isTableCellContextTag(token.name())) {
1103             m_tree.openElements()->popUntilTableRowScopeMarker();
1104             m_tree.insertHTMLElement(token);
1105             m_insertionMode = InCellMode;
1106             m_tree.activeFormattingElements()->appendMarker();
1107             return;
1108         }
1109         if (token.name() == trTag
1110             || isCaptionColOrColgroupTag(token.name())
1111             || isTableBodyContextTag(token.name())) {
1112             if (!processTrEndTagForInRow()) {
1113                 ASSERT(m_isParsingFragment);
1114                 return;
1115             }
1116             ASSERT(insertionMode() == InTableBodyMode);
1117             processStartTag(token);
1118             return;
1119         }
1120         processStartTagForInTable(token);
1121         break;
1122     case InCellMode:
1123         ASSERT(insertionMode() == InCellMode);
1124         if (isCaptionColOrColgroupTag(token.name())
1125             || isTableCellContextTag(token.name())
1126             || token.name() == trTag
1127             || isTableBodyContextTag(token.name())) {
1128             // FIXME: This could be more efficient.
1129             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1130                 ASSERT(m_isParsingFragment);
1131                 parseError(token);
1132                 return;
1133             }
1134             closeTheCell();
1135             processStartTag(token);
1136             return;
1137         }
1138         processStartTagForInBody(token);
1139         break;
1140     case AfterBodyMode:
1141     case AfterAfterBodyMode:
1142         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1143         if (token.name() == htmlTag) {
1144             m_tree.insertHTMLHtmlStartTagInBody(token);
1145             return;
1146         }
1147         m_insertionMode = InBodyMode;
1148         processStartTag(token);
1149         break;
1150     case InHeadNoscriptMode:
1151         ASSERT(insertionMode() == InHeadNoscriptMode);
1152         if (token.name() == htmlTag) {
1153             m_tree.insertHTMLHtmlStartTagInBody(token);
1154             return;
1155         }
1156         if (token.name() == linkTag
1157             || token.name() == metaTag
1158             || token.name() == noframesTag
1159             || token.name() == styleTag) {
1160             bool didProcess = processStartTagForInHead(token);
1161             ASSERT_UNUSED(didProcess, didProcess);
1162             return;
1163         }
1164         if (token.name() == htmlTag || token.name() == noscriptTag) {
1165             parseError(token);
1166             return;
1167         }
1168         processDefaultForInHeadNoscriptMode(token);
1169         processToken(token);
1170         break;
1171     case InFramesetMode:
1172         ASSERT(insertionMode() == InFramesetMode);
1173         if (token.name() == htmlTag) {
1174             m_tree.insertHTMLHtmlStartTagInBody(token);
1175             return;
1176         }
1177         if (token.name() == framesetTag) {
1178             m_tree.insertHTMLElement(token);
1179             return;
1180         }
1181         if (token.name() == frameTag) {
1182             m_tree.insertSelfClosingHTMLElement(token);
1183             return;
1184         }
1185         if (token.name() == noframesTag) {
1186             processStartTagForInHead(token);
1187             return;
1188         }
1189         parseError(token);
1190         break;
1191     case AfterFramesetMode:
1192     case AfterAfterFramesetMode:
1193         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1194         if (token.name() == htmlTag) {
1195             m_tree.insertHTMLHtmlStartTagInBody(token);
1196             return;
1197         }
1198         if (token.name() == noframesTag) {
1199             processStartTagForInHead(token);
1200             return;
1201         }
1202         parseError(token);
1203         break;
1204     case InSelectInTableMode:
1205         ASSERT(insertionMode() == InSelectInTableMode);
1206         if (token.name() == captionTag
1207             || token.name() == tableTag
1208             || isTableBodyContextTag(token.name())
1209             || token.name() == trTag
1210             || isTableCellContextTag(token.name())) {
1211             parseError(token);
1212             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1213             processEndTag(endSelect);
1214             processStartTag(token);
1215             return;
1216         }
1217         // Fall through
1218     case InSelectMode:
1219         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1220         if (token.name() == htmlTag) {
1221             m_tree.insertHTMLHtmlStartTagInBody(token);
1222             return;
1223         }
1224         if (token.name() == optionTag) {
1225             if (m_tree.currentElement()->hasTagName(optionTag)) {
1226                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1227                 processEndTag(endOption);
1228             }
1229             m_tree.insertHTMLElement(token);
1230             return;
1231         }
1232         if (token.name() == optgroupTag) {
1233             if (m_tree.currentElement()->hasTagName(optionTag)) {
1234                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1235                 processEndTag(endOption);
1236             }
1237             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1238                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1239                 processEndTag(endOptgroup);
1240             }
1241             m_tree.insertHTMLElement(token);
1242             return;
1243         }
1244         if (token.name() == selectTag) {
1245             parseError(token);
1246             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1247             processEndTag(endSelect);
1248             return;
1249         }
1250         if (token.name() == inputTag
1251             || token.name() == keygenTag
1252             || token.name() == textareaTag) {
1253             parseError(token);
1254             notImplemented(); // fragment case
1255             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1256             processEndTag(endSelect);
1257             processStartTag(token);
1258             return;
1259         }
1260         if (token.name() == scriptTag) {
1261             bool didProcess = processStartTagForInHead(token);
1262             ASSERT_UNUSED(didProcess, didProcess);
1263             return;
1264         }
1265         break;
1266     case TextMode:
1267     case InTableTextMode:
1268     case InForeignContentMode:
1269         notImplemented();
1270         break;
1271     }
1272 }
1273
1274 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1275 {
1276     ASSERT(token.type() == HTMLToken::EndTag);
1277     ASSERT(token.name() == bodyTag);
1278     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1279         parseError(token);
1280         return false;
1281     }
1282     notImplemented();
1283     m_insertionMode = AfterBodyMode;
1284     return true;
1285 }
1286
1287 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1288 {
1289     ASSERT(token.type() == HTMLToken::EndTag);
1290     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1291     while (1) {
1292         Element* node = record->element();
1293         if (node->hasLocalName(token.name())) {
1294             m_tree.generateImpliedEndTags();
1295             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1296                 parseError(token);
1297                 // FIXME: This is either a bug in the spec, or a bug in our
1298                 // implementation.  Filed a bug with HTML5:
1299                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1300                 // We might have already popped the node for the token in
1301                 // generateImpliedEndTags, just abort.
1302                 if (!m_tree.openElements()->contains(node))
1303                     return;
1304             }
1305             m_tree.openElements()->popUntil(node);
1306             m_tree.openElements()->pop();
1307             return;
1308         }
1309         if (isNotFormattingAndNotPhrasing(node)) {
1310             parseError(token);
1311             return;
1312         }
1313         record = record->next();
1314     }
1315 }
1316
1317 // FIXME: This probably belongs on HTMLElementStack.
1318 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1319 {
1320     HTMLElementStack::ElementRecord* furthestBlock = 0;
1321     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1322     for (; record; record = record->next()) {
1323         if (record->element() == formattingElement)
1324             return furthestBlock;
1325         if (isNotFormattingAndNotPhrasing(record->element()))
1326             furthestBlock = record;
1327     }
1328     ASSERT_NOT_REACHED();
1329     return 0;
1330 }
1331
1332 // FIXME: This should have a whitty name.
1333 // FIXME: This must be implemented in many other places in WebCore.
1334 void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
1335 {
1336     Node* child = oldParent->firstChild();
1337     while (child) {
1338         Node* nextChild = child->nextSibling();
1339         ExceptionCode ec;
1340         newParent->appendChild(child, ec);
1341         ASSERT(!ec);
1342         child = nextChild;
1343     }
1344 }
1345
1346 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1347 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1348 {
1349     while (1) {
1350         // 1.
1351         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1352         if (!formattingElement || !m_tree.openElements()->inScope(formattingElement)) {
1353             parseError(token);
1354             notImplemented(); // Check the stack of open elements for a more specific parse error.
1355             return;
1356         }
1357         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1358         if (!formattingElementRecord) {
1359             parseError(token);
1360             m_tree.activeFormattingElements()->remove(formattingElement);
1361             return;
1362         }
1363         if (formattingElement != m_tree.currentElement())
1364             parseError(token);
1365         // 2.
1366         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1367         // 3.
1368         if (!furthestBlock) {
1369             m_tree.openElements()->popUntil(formattingElement);
1370             m_tree.openElements()->pop();
1371             m_tree.activeFormattingElements()->remove(formattingElement);
1372             return;
1373         }
1374         // 4.
1375         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1376         Element* commonAncestor = formattingElementRecord->next()->element();
1377         // 5.
1378         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1379         // 6.
1380         HTMLElementStack::ElementRecord* node = furthestBlock;
1381         HTMLElementStack::ElementRecord* nextNode = node->next();
1382         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1383         while (1) {
1384             // 6.1
1385             node = nextNode;
1386             ASSERT(node);
1387             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1388             // 6.2
1389             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1390                 m_tree.openElements()->remove(node->element());
1391                 node = 0;
1392                 continue;
1393             }
1394             // 6.3
1395             if (node == formattingElementRecord)
1396                 break;
1397             // 6.5
1398             // FIXME: We're supposed to save the original token in the entry.
1399             AtomicHTMLToken fakeToken(HTMLToken::StartTag, node->element()->localName());
1400             // Is createHTMLElement correct? (instead of insertHTMLElement)
1401             // Does this code ever leave newElement unattached?
1402             RefPtr<Element> newElement = m_tree.createHTMLElement(fakeToken);
1403             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1404             nodeEntry->replaceElement(newElement.get());
1405             node->replaceElement(newElement.release());
1406             // 6.4 -- Intentionally out of order to handle the case where node
1407             // was replaced in 6.5.
1408             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1409             if (lastNode == furthestBlock)
1410                 bookmark.moveToAfter(node->element());
1411             // 6.6
1412             // Use appendChild instead of parserAddChild to handle possible reparenting.
1413             ExceptionCode ec;
1414             node->element()->appendChild(lastNode->element(), ec);
1415             ASSERT(!ec);
1416             // 6.7
1417             lastNode = node;
1418         }
1419         // 7
1420         const AtomicString& commonAncestorTag = commonAncestor->localName();
1421         if (commonAncestorTag == tableTag
1422             || commonAncestorTag == trTag
1423             || isTableBodyContextTag(commonAncestorTag))
1424             m_tree.fosterParent(lastNode->element());
1425         else {
1426             ExceptionCode ec;
1427             commonAncestor->appendChild(lastNode->element(), ec);
1428             ASSERT(!ec);
1429         }
1430         // 8
1431         // FIXME: We're supposed to save the original token in the entry.
1432         AtomicHTMLToken fakeToken(HTMLToken::StartTag, formattingElement->localName());
1433         RefPtr<Element> newElement = m_tree.createHTMLElement(fakeToken);
1434         // 9
1435         reparentChildren(furthestBlock->element(), newElement.get());
1436         // 10
1437         furthestBlock->element()->parserAddChild(newElement);
1438         // 11
1439         m_tree.activeFormattingElements()->remove(formattingElement);
1440         m_tree.activeFormattingElements()->insertAt(newElement.get(), bookmark);
1441         // 12
1442         m_tree.openElements()->remove(formattingElement);
1443         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1444     }
1445 }
1446
1447 void HTMLTreeBuilder::setInsertionModeAndEnd(InsertionMode newInsertionMode, bool foreign)
1448 {
1449     m_insertionMode = newInsertionMode;
1450     if (foreign) {
1451         m_secondaryInsertionMode = m_insertionMode;
1452         m_insertionMode = InForeignContentMode;
1453     }
1454 }
1455
1456 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1457 {
1458     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1459     bool last = false;
1460     bool foreign = false;
1461     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1462     while (1) {
1463         Element* node = nodeRecord->element();
1464         if (node == m_tree.openElements()->bottom()) {
1465             ASSERT(m_isParsingFragment);
1466             last = true;
1467             notImplemented(); // node = m_contextElement;
1468         }
1469         if (node->hasTagName(selectTag)) {
1470             ASSERT(m_isParsingFragment);
1471             return setInsertionModeAndEnd(InSelectMode, foreign);
1472         }
1473         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1474             return setInsertionModeAndEnd(InCellMode, foreign);
1475         if (node->hasTagName(trTag))
1476             return setInsertionModeAndEnd(InRowMode, foreign);
1477         if (isTableBodyContextTag(node->localName()))
1478             return setInsertionModeAndEnd(InTableBodyMode, foreign);
1479         if (node->hasTagName(captionTag))
1480             return setInsertionModeAndEnd(InCaptionMode, foreign);
1481         if (node->hasTagName(colgroupTag)) {
1482             ASSERT(m_isParsingFragment);
1483             return setInsertionModeAndEnd(InColumnGroupMode, foreign);
1484         }
1485         if (node->hasTagName(tableTag))
1486             return setInsertionModeAndEnd(InTableMode, foreign);
1487         if (node->hasTagName(headTag)) {
1488             ASSERT(m_isParsingFragment);
1489             return setInsertionModeAndEnd(InBodyMode, foreign);
1490         }
1491         if (node->hasTagName(bodyTag))
1492             return setInsertionModeAndEnd(InBodyMode, foreign);
1493         if (node->hasTagName(framesetTag)) {
1494             ASSERT(m_isParsingFragment);
1495             return setInsertionModeAndEnd(InFramesetMode, foreign);
1496         }
1497         if (node->hasTagName(htmlTag)) {
1498             ASSERT(m_isParsingFragment);
1499             return setInsertionModeAndEnd(BeforeHeadMode, foreign);
1500         }
1501         if (false
1502 #if ENABLE(SVG)
1503         || node->namespaceURI() == SVGNames::svgNamespaceURI
1504 #endif
1505 #if ENABLE(MATHML)
1506         || node->namespaceURI() == MathMLNames::mathmlNamespaceURI
1507 #endif
1508             )
1509             foreign = true;
1510         if (last) {
1511             ASSERT(m_isParsingFragment);
1512             return setInsertionModeAndEnd(InBodyMode, foreign);
1513         }
1514         nodeRecord = nodeRecord->next();
1515     }
1516 }
1517
1518 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1519 {
1520     ASSERT(token.type() == HTMLToken::EndTag);
1521     if (token.name() == bodyTag) {
1522         processBodyEndTagForInBody(token);
1523         return;
1524     }
1525     if (token.name() == htmlTag) {
1526         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1527         if (processBodyEndTagForInBody(endBody))
1528             processEndTag(token);
1529         return;
1530     }
1531     if (token.name() == addressTag
1532         || token.name() == articleTag
1533         || token.name() == asideTag
1534         || token.name() == blockquoteTag
1535         || token.name() == buttonTag
1536         || token.name() == centerTag
1537         || token.name() == "details"
1538         || token.name() == dirTag
1539         || token.name() == divTag
1540         || token.name() == dlTag
1541         || token.name() == fieldsetTag
1542         || token.name() == "figure"
1543         || token.name() == footerTag
1544         || token.name() == headerTag
1545         || token.name() == hgroupTag
1546         || token.name() == listingTag
1547         || token.name() == menuTag
1548         || token.name() == navTag
1549         || token.name() == olTag
1550         || token.name() == preTag
1551         || token.name() == sectionTag
1552         || token.name() == ulTag) {
1553         if (!m_tree.openElements()->inScope(token.name())) {
1554             parseError(token);
1555             return;
1556         }
1557         m_tree.generateImpliedEndTags();
1558         if (!m_tree.currentElement()->hasLocalName(token.name()))
1559             parseError(token);
1560         m_tree.openElements()->popUntil(token.name());
1561         m_tree.openElements()->pop();
1562         return;
1563     }
1564     if (token.name() == formTag) {
1565         RefPtr<Element> node = m_tree.takeForm();
1566         if (!node || !m_tree.openElements()->inScope(node.get())) {
1567             parseError(token);
1568             return;
1569         }
1570         m_tree.generateImpliedEndTags();
1571         if (m_tree.currentElement() != node.get())
1572             parseError(token);
1573         m_tree.openElements()->remove(node.get());
1574     }
1575     if (token.name() == pTag) {
1576         if (!m_tree.openElements()->inScope(token.name())) {
1577             parseError(token);
1578             processFakeStartTag(pTag);
1579             ASSERT(m_tree.openElements()->inScope(token.name()));
1580             processEndTag(token);
1581             return;
1582         }
1583         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1584         if (!m_tree.currentElement()->hasLocalName(token.name()))
1585             parseError(token);
1586         m_tree.openElements()->popUntil(token.name());
1587         m_tree.openElements()->pop();
1588         return;
1589     }
1590     if (token.name() == liTag) {
1591         if (!m_tree.openElements()->inListItemScope(token.name())) {
1592             parseError(token);
1593             return;
1594         }
1595         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1596         if (!m_tree.currentElement()->hasLocalName(token.name()))
1597             parseError(token);
1598         m_tree.openElements()->popUntil(token.name());
1599         m_tree.openElements()->pop();
1600         return;
1601     }
1602     if (token.name() == ddTag
1603         || token.name() == dtTag) {
1604         if (!m_tree.openElements()->inScope(token.name())) {
1605             parseError(token);
1606             return;
1607         }
1608         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1609         if (!m_tree.currentElement()->hasLocalName(token.name()))
1610             parseError(token);
1611         m_tree.openElements()->popUntil(token.name());
1612         m_tree.openElements()->pop();
1613         return;
1614     }
1615     if (isNumberedHeaderTag(token.name())) {
1616         if (!m_tree.openElements()->inScope(token.name())) {
1617             parseError(token);
1618             return;
1619         }
1620         m_tree.generateImpliedEndTags();
1621         if (!m_tree.currentElement()->hasLocalName(token.name()))
1622             parseError(token);
1623         m_tree.openElements()->popUntil(token.name());
1624         m_tree.openElements()->pop();
1625         return;
1626     }
1627     if (token.name() == "sarcasm") {
1628         notImplemented(); // Take a deep breath.
1629         return;
1630     }
1631     if (isFormattingTag(token.name())) {
1632         callTheAdoptionAgency(token);
1633         return;
1634     }
1635     if (token.name() == appletTag
1636         || token.name() == marqueeTag
1637         || token.name() == objectTag) {
1638         if (!m_tree.openElements()->inScope(token.name())) {
1639             parseError(token);
1640             return;
1641         }
1642         m_tree.generateImpliedEndTags();
1643         if (!m_tree.currentElement()->hasLocalName(token.name()))
1644             parseError(token);
1645         m_tree.openElements()->popUntil(token.name());
1646         m_tree.openElements()->pop();
1647         m_tree.activeFormattingElements()->clearToLastMarker();
1648         return;
1649     }
1650     if (token.name() == brTag) {
1651         parseError(token);
1652         processFakeStartTag(brTag);
1653         return;
1654     }
1655     processAnyOtherEndTagForInBody(token);
1656 }
1657
1658 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1659 {
1660     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1661         ASSERT(m_isParsingFragment);
1662         // FIXME: parse error
1663         return false;
1664     }
1665     m_tree.generateImpliedEndTags();
1666     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1667     m_tree.openElements()->popUntil(captionTag.localName());
1668     m_tree.openElements()->pop();
1669     m_tree.activeFormattingElements()->clearToLastMarker();
1670     m_insertionMode = InTableMode;
1671     return true;
1672 }
1673
1674 bool HTMLTreeBuilder::processTrEndTagForInRow()
1675 {
1676     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1677         ASSERT(m_isParsingFragment);
1678         // FIXME: parse error
1679         return false;
1680     }
1681     m_tree.openElements()->popUntilTableRowScopeMarker();
1682     ASSERT(m_tree.currentElement()->hasTagName(trTag));
1683     m_tree.openElements()->pop();
1684     m_insertionMode = InTableBodyMode;
1685     return true;
1686 }
1687
1688 bool HTMLTreeBuilder::processTableEndTagForInTable()
1689 {
1690     if (!m_tree.openElements()->inTableScope(tableTag)) {
1691         ASSERT(m_isParsingFragment);
1692         // FIXME: parse error.
1693         return false;
1694     }
1695     m_tree.openElements()->popUntil(tableTag.localName());
1696     m_tree.openElements()->pop();
1697     resetInsertionModeAppropriately();
1698     return true;
1699 }
1700
1701 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1702 {
1703     ASSERT(token.type() == HTMLToken::EndTag);
1704     if (token.name() == tableTag) {
1705         processTableEndTagForInTable();
1706         return;
1707     }
1708     if (token.name() == bodyTag
1709         || isCaptionColOrColgroupTag(token.name())
1710         || token.name() == htmlTag
1711         || isTableBodyContextTag(token.name())
1712         || isTableCellContextTag(token.name())
1713         || token.name() == trTag) {
1714         parseError(token);
1715         return;
1716     }
1717     // Is this redirection necessary here?
1718     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree, requiresRedirectToFosterParent(m_tree.currentElement()));
1719     processEndTagForInBody(token);
1720 }
1721
1722 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
1723 {
1724     ASSERT(token.type() == HTMLToken::EndTag);
1725     switch (insertionMode()) {
1726     case InitialMode:
1727         ASSERT(insertionMode() == InitialMode);
1728         processDefaultForInitialMode(token);
1729         // Fall through.
1730     case BeforeHTMLMode:
1731         ASSERT(insertionMode() == BeforeHTMLMode);
1732         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1733             parseError(token);
1734             return;
1735         }
1736         processDefaultForBeforeHTMLMode(token);
1737         // Fall through.
1738     case BeforeHeadMode:
1739         ASSERT(insertionMode() == BeforeHeadMode);
1740         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1741             parseError(token);
1742             return;
1743         }
1744         processDefaultForBeforeHeadMode(token);
1745         // Fall through.
1746     case InHeadMode:
1747         ASSERT(insertionMode() == InHeadMode);
1748         if (token.name() == headTag) {
1749             m_tree.openElements()->popHTMLHeadElement();
1750             setInsertionMode(AfterHeadMode);
1751             return;
1752         }
1753         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1754             parseError(token);
1755             return;
1756         }
1757         processDefaultForInHeadMode(token);
1758         // Fall through.
1759     case AfterHeadMode:
1760         ASSERT(insertionMode() == AfterHeadMode);
1761         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1762             parseError(token);
1763             return;
1764         }
1765         processDefaultForAfterHeadMode(token);
1766         // Fall through
1767     case InBodyMode:
1768         ASSERT(insertionMode() == InBodyMode);
1769         processEndTagForInBody(token);
1770         break;
1771     case InTableMode:
1772         ASSERT(insertionMode() == InTableMode);
1773         processEndTagForInTable(token);
1774         break;
1775     case InCaptionMode:
1776         ASSERT(insertionMode() == InCaptionMode);
1777         if (token.name() == captionTag) {
1778             processCaptionEndTagForInCaption();
1779             return;
1780         }
1781         if (token.name() == tableTag) {
1782             parseError(token);
1783             if (!processCaptionEndTagForInCaption()) {
1784                 ASSERT(m_isParsingFragment);
1785                 return;
1786             }
1787             processEndTag(token);
1788             return;
1789         }
1790         if (token.name() == bodyTag
1791             || token.name() == colTag
1792             || token.name() == colgroupTag
1793             || token.name() == htmlTag
1794             || isTableBodyContextTag(token.name())
1795             || isTableCellContextTag(token.name())
1796             || token.name() == trTag) {
1797             parseError(token);
1798             return;
1799         }
1800         processEndTagForInBody(token);
1801         break;
1802     case InColumnGroupMode:
1803         ASSERT(insertionMode() == InColumnGroupMode);
1804         if (token.name() == colgroupTag) {
1805             processColgroupEndTagForInColumnGroup();
1806             return;
1807         }
1808         if (token.name() == colTag) {
1809             parseError(token);
1810             return;
1811         }
1812         if (!processColgroupEndTagForInColumnGroup()) {
1813             ASSERT(m_isParsingFragment);
1814             return;
1815         }
1816         processEndTag(token);
1817         break;
1818     case InRowMode:
1819         ASSERT(insertionMode() == InRowMode);
1820         if (token.name() == trTag) {
1821             processTrEndTagForInRow();
1822             return;
1823         }
1824         if (token.name() == tableTag) {
1825             if (!processTrEndTagForInRow()) {
1826                 ASSERT(m_isParsingFragment);
1827                 return;
1828             }
1829             ASSERT(insertionMode() == InTableBodyMode);
1830             processEndTag(token);
1831             return;
1832         }
1833         if (isTableBodyContextTag(token.name())) {
1834             if (!m_tree.openElements()->inTableScope(token.name())) {
1835                 parseError(token);
1836                 return;
1837             }
1838             processFakeEndTag(trTag);
1839             ASSERT(insertionMode() == InTableBodyMode);
1840             processEndTag(token);
1841             return;
1842         }
1843         if (token.name() == bodyTag
1844             || isCaptionColOrColgroupTag(token.name())
1845             || token.name() == htmlTag
1846             || isTableCellContextTag(token.name())) {
1847             parseError(token);
1848             return;
1849         }
1850         processEndTagForInTable(token);
1851         break;
1852     case InCellMode:
1853         ASSERT(insertionMode() == InCellMode);
1854         if (isTableCellContextTag(token.name())) {
1855             if (!m_tree.openElements()->inTableScope(token.name())) {
1856                 parseError(token);
1857                 return;
1858             }
1859             m_tree.generateImpliedEndTags();
1860             if (!m_tree.currentElement()->hasLocalName(token.name()))
1861                 parseError(token);
1862             m_tree.openElements()->popUntil(token.name());
1863             m_tree.openElements()->pop();
1864             m_tree.activeFormattingElements()->clearToLastMarker();
1865             m_insertionMode = InRowMode;
1866             ASSERT(m_tree.currentElement()->hasTagName(trTag));
1867             return;
1868         }
1869         if (token.name() == bodyTag
1870             || isCaptionColOrColgroupTag(token.name())
1871             || token.name() == htmlTag) {
1872             parseError(token);
1873             return;
1874         }
1875         if (token.name() == tableTag
1876             || token.name() == trTag
1877             || isTableBodyContextTag(token.name())) {
1878             if (!m_tree.openElements()->inTableScope(token.name())) {
1879                 ASSERT(m_isParsingFragment);
1880                 // FIXME: It is unclear what the exact ASSERT should be.
1881                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098
1882                 parseError(token);
1883                 return;
1884             }
1885             closeTheCell();
1886             processEndTag(token);
1887             return;
1888         }
1889         processEndTagForInBody(token);
1890         break;
1891     case InTableBodyMode:
1892         ASSERT(insertionMode() == InTableBodyMode);
1893         if (isTableBodyContextTag(token.name())) {
1894             if (!m_tree.openElements()->inTableScope(token.name())) {
1895                 parseError(token);
1896                 return;
1897             }
1898             m_tree.openElements()->popUntilTableBodyScopeMarker();
1899             m_tree.openElements()->pop();
1900             m_insertionMode = InTableMode;
1901             return;
1902         }
1903         if (token.name() == tableTag) {
1904             // FIXME: This is slow.
1905             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1906                 ASSERT(m_isParsingFragment);
1907                 parseError(token);
1908                 return;
1909             }
1910             m_tree.openElements()->popUntilTableBodyScopeMarker();
1911             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1912             processFakeEndTag(m_tree.currentElement()->tagQName());
1913             processEndTag(token);
1914             return;
1915         }
1916         if (token.name() == bodyTag
1917             || isCaptionColOrColgroupTag(token.name())
1918             || token.name() == htmlTag
1919             || isTableCellContextTag(token.name())
1920             || token.name() == trTag) {
1921             parseError(token);
1922             return;
1923         }
1924         processEndTagForInTable(token);
1925         break;
1926     case AfterBodyMode:
1927         ASSERT(insertionMode() == AfterBodyMode);
1928         if (token.name() == htmlTag) {
1929             if (m_isParsingFragment) {
1930                 parseError(token);
1931                 return;
1932             }
1933             m_insertionMode = AfterAfterBodyMode;
1934             return;
1935         }
1936         // Fall through.
1937     case AfterAfterBodyMode:
1938         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1939         parseError(token);
1940         m_insertionMode = InBodyMode;
1941         processEndTag(token);
1942         break;
1943     case InHeadNoscriptMode:
1944         ASSERT(insertionMode() == InHeadNoscriptMode);
1945         if (token.name() == noscriptTag) {
1946             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
1947             m_tree.openElements()->pop();
1948             ASSERT(m_tree.currentElement()->hasTagName(headTag));
1949             setInsertionMode(InHeadMode);
1950             return;
1951         }
1952         if (token.name() != brTag) {
1953             parseError(token);
1954             return;
1955         }
1956         processDefaultForInHeadNoscriptMode(token);
1957         processToken(token);
1958         break;
1959     case TextMode:
1960         if (token.name() == scriptTag) {
1961             // Pause ourselves so that parsing stops until the script can be processed by the caller.
1962             m_isPaused = true;
1963             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
1964             m_scriptToProcess = m_tree.currentElement();
1965             m_tree.openElements()->pop();
1966             m_insertionMode = m_originalInsertionMode;
1967             return;
1968         }
1969         m_tree.openElements()->pop();
1970         m_insertionMode = m_originalInsertionMode;
1971         break;
1972     case InFramesetMode:
1973         ASSERT(insertionMode() == InFramesetMode);
1974         if (token.name() == framesetTag) {
1975             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1976                 parseError(token);
1977                 return;
1978             }
1979             m_tree.openElements()->pop();
1980             if (!m_isParsingFragment && !m_tree.currentElement()->hasTagName(framesetTag))
1981                 m_insertionMode = AfterFramesetMode;
1982             return;
1983         }
1984         break;
1985     case AfterFramesetMode:
1986         ASSERT(insertionMode() == AfterFramesetMode);
1987         if (token.name() == htmlTag) {
1988             m_insertionMode = AfterAfterFramesetMode;
1989             return;
1990         }
1991         // Fall through.
1992     case AfterAfterFramesetMode:
1993         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1994         parseError(token);
1995         break;
1996     case InSelectInTableMode:
1997         ASSERT(insertionMode() == InSelectInTableMode);
1998         if (token.name() == captionTag
1999             || token.name() == tableTag
2000             || isTableBodyContextTag(token.name())
2001             || token.name() == trTag
2002             || isTableCellContextTag(token.name())) {
2003             parseError(token);
2004             if (m_tree.openElements()->inTableScope(token.name())) {
2005                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2006                 processEndTag(endSelect);
2007                 processEndTag(token);
2008             }
2009             return;
2010         }
2011         // Fall through.
2012     case InSelectMode:
2013         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2014         if (token.name() == optgroupTag) {
2015             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2016                 processFakeEndTag(optionTag);
2017             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2018                 m_tree.openElements()->pop();
2019                 return;
2020             }
2021             parseError(token);
2022             return;
2023         }
2024         if (token.name() == optionTag) {
2025             if (m_tree.currentElement()->hasTagName(optionTag)) {
2026                 m_tree.openElements()->pop();
2027                 return;
2028             }
2029             parseError(token);
2030             return;
2031         }
2032         if (token.name() == selectTag) {
2033             notImplemented(); // fragment case
2034             m_tree.openElements()->popUntil(selectTag.localName());
2035             m_tree.openElements()->pop();
2036             resetInsertionModeAppropriately();
2037             return;
2038         }
2039         break;
2040     case InTableTextMode:
2041     case InForeignContentMode:
2042         notImplemented();
2043         break;
2044     }
2045 }
2046
2047 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2048 {
2049     ASSERT(token.type() == HTMLToken::Comment);
2050     if (m_insertionMode == InitialMode
2051         || m_insertionMode == BeforeHTMLMode
2052         || m_insertionMode == AfterAfterBodyMode
2053         || m_insertionMode == AfterAfterFramesetMode) {
2054         m_tree.insertCommentOnDocument(token);
2055         return;
2056     }
2057     if (m_insertionMode == AfterBodyMode) {
2058         m_tree.insertCommentOnHTMLHtmlElement(token);
2059         return;
2060     }
2061     m_tree.insertComment(token);
2062 }
2063
2064 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2065 {
2066     ASSERT(token.type() == HTMLToken::Character);
2067     // FIXME: We need to figure out how to handle each character individually.
2068     switch (insertionMode()) {
2069     case InitialMode:
2070         ASSERT(insertionMode() == InitialMode);
2071         if (skipLeadingWhitespace(token))
2072             return;
2073         processDefaultForInitialMode(token);
2074         // Fall through.
2075     case BeforeHTMLMode:
2076         ASSERT(insertionMode() == BeforeHTMLMode);
2077         if (skipLeadingWhitespace(token))
2078             return;
2079         processDefaultForBeforeHTMLMode(token);
2080         // Fall through.
2081     case BeforeHeadMode:
2082         ASSERT(insertionMode() == BeforeHeadMode);
2083         if (skipLeadingWhitespace(token))
2084             return;
2085         processDefaultForBeforeHeadMode(token);
2086         // Fall through.
2087     case InHeadMode:
2088         ASSERT(insertionMode() == InHeadMode);
2089         if (m_tree.insertLeadingWhitespace(token))
2090             return;
2091         processDefaultForInHeadMode(token);
2092         // Fall through.
2093     case AfterHeadMode:
2094         ASSERT(insertionMode() == AfterHeadMode);
2095         if (m_tree.insertLeadingWhitespace(token))
2096             return;
2097         processDefaultForAfterHeadMode(token);
2098         // Fall through
2099     case InBodyMode:
2100     case InCaptionMode:
2101     case InCellMode:
2102         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2103         m_tree.reconstructTheActiveFormattingElements();
2104         m_tree.insertTextNode(token);
2105         if (m_framesetOk && hasNonWhitespace(token.characters()))
2106             m_framesetOk = false;
2107         break;
2108     case InTableMode:
2109     case InTableBodyMode:
2110     case InRowMode:
2111         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2112         notImplemented(); // Crazy pending characters.
2113         m_tree.insertTextNode(token);
2114         break;
2115     case InTableTextMode:
2116         notImplemented(); // Crazy pending characters.
2117         break;
2118     case InColumnGroupMode:
2119         ASSERT(insertionMode() == InColumnGroupMode);
2120         if (m_tree.insertLeadingWhitespace(token))
2121             return;
2122         if (!processColgroupEndTagForInColumnGroup()) {
2123             ASSERT(m_isParsingFragment);
2124             return;
2125         }
2126         processCharacter(token);
2127         break;
2128     case AfterBodyMode:
2129     case AfterAfterBodyMode:
2130         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2131         parseError(token);
2132         m_insertionMode = InBodyMode;
2133         processCharacter(token);
2134         break;
2135     case TextMode:
2136         notImplemented();
2137         m_tree.insertTextNode(token);
2138         break;
2139     case InHeadNoscriptMode:
2140         ASSERT(insertionMode() == InHeadNoscriptMode);
2141         if (m_tree.insertLeadingWhitespace(token))
2142             return;
2143         processDefaultForInHeadNoscriptMode(token);
2144         processToken(token);
2145         break;
2146     case InFramesetMode:
2147     case AfterFramesetMode:
2148         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2149         if (m_tree.insertLeadingWhitespace(token))
2150             return;
2151         parseError(token);
2152         // FIXME: We probably need some sort of loop here. We're basically
2153         // filtering out the non-whitespace characters.
2154         break;
2155     case InSelectInTableMode:
2156     case InSelectMode:
2157         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2158         m_tree.insertTextNode(token);
2159         break;
2160     case InForeignContentMode:
2161         notImplemented();
2162         break;
2163     case AfterAfterFramesetMode:
2164         if (m_tree.insertLeadingWhitespaceWithActiveFormattingElements(token))
2165             return;
2166         parseError(token);
2167         // FIXME: We probably need some sort of loop here. We're basically
2168         // filtering out the non-whitespace characters.
2169         break;
2170     }
2171 }
2172
2173 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2174 {
2175     ASSERT(token.type() == HTMLToken::EndOfFile);
2176     switch (insertionMode()) {
2177     case InitialMode:
2178         ASSERT(insertionMode() == InitialMode);
2179         processDefaultForInitialMode(token);
2180         // Fall through.
2181     case BeforeHTMLMode:
2182         ASSERT(insertionMode() == BeforeHTMLMode);
2183         processDefaultForBeforeHTMLMode(token);
2184         // Fall through.
2185     case BeforeHeadMode:
2186         ASSERT(insertionMode() == BeforeHeadMode);
2187         processDefaultForBeforeHeadMode(token);
2188         // Fall through.
2189     case InHeadMode:
2190         ASSERT(insertionMode() == InHeadMode);
2191         processDefaultForInHeadMode(token);
2192         // Fall through.
2193     case AfterHeadMode:
2194         ASSERT(insertionMode() == AfterHeadMode);
2195         processDefaultForAfterHeadMode(token);
2196         // Fall through
2197     case InBodyMode:
2198     case InCellMode:
2199         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
2200         notImplemented();
2201         break;
2202     case AfterBodyMode:
2203     case AfterAfterBodyMode:
2204         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2205         notImplemented();
2206         break;
2207     case InHeadNoscriptMode:
2208         ASSERT(insertionMode() == InHeadNoscriptMode);
2209         processDefaultForInHeadNoscriptMode(token);
2210         processToken(token);
2211         break;
2212     case AfterFramesetMode:
2213     case AfterAfterFramesetMode:
2214         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2215         break;
2216     case InFramesetMode:
2217     case InTableMode:
2218     case InTableBodyMode:
2219     case InSelectInTableMode:
2220     case InSelectMode:
2221         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2222         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2223             parseError(token);
2224         break;
2225     case InColumnGroupMode:
2226         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2227             ASSERT(m_isParsingFragment);
2228             return;
2229         }
2230         if (!processColgroupEndTagForInColumnGroup()) {
2231             ASSERT(m_isParsingFragment);
2232             return;
2233         }
2234         processEndOfFile(token);
2235         break;
2236     case TextMode:
2237     case InTableTextMode:
2238     case InCaptionMode:
2239     case InRowMode:
2240     case InForeignContentMode:
2241         notImplemented();
2242         break;
2243     }
2244 }
2245
2246 void HTMLTreeBuilder::processDefaultForInitialMode(AtomicHTMLToken& token)
2247 {
2248     notImplemented();
2249     parseError(token);
2250     setInsertionMode(BeforeHTMLMode);
2251 }
2252
2253 void HTMLTreeBuilder::processDefaultForBeforeHTMLMode(AtomicHTMLToken&)
2254 {
2255     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2256     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2257     setInsertionMode(BeforeHeadMode);
2258 }
2259
2260 void HTMLTreeBuilder::processDefaultForBeforeHeadMode(AtomicHTMLToken&)
2261 {
2262     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2263     processStartTag(startHead);
2264 }
2265
2266 void HTMLTreeBuilder::processDefaultForInHeadMode(AtomicHTMLToken&)
2267 {
2268     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2269     processEndTag(endHead);
2270 }
2271
2272 void HTMLTreeBuilder::processDefaultForInHeadNoscriptMode(AtomicHTMLToken&)
2273 {
2274     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2275     processEndTag(endNoscript);
2276 }
2277
2278 void HTMLTreeBuilder::processDefaultForAfterHeadMode(AtomicHTMLToken&)
2279 {
2280     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2281     processStartTag(startBody);
2282     m_framesetOk = true;
2283 }
2284
2285 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2286 {
2287     ASSERT(token.type() == HTMLToken::StartTag);
2288     if (token.name() == htmlTag) {
2289         m_tree.insertHTMLHtmlStartTagInBody(token);
2290         return true;
2291     }
2292     // FIXME: Atomize "command".
2293     if (token.name() == baseTag
2294         || token.name() == "command"
2295         || token.name() == linkTag
2296         || token.name() == metaTag) {
2297         m_tree.insertSelfClosingHTMLElement(token);
2298         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2299         return true;
2300     }
2301     if (token.name() == titleTag) {
2302         processGenericRCDATAStartTag(token);
2303         return true;
2304     }
2305     if (token.name() == noscriptTag) {
2306         if (isScriptingFlagEnabled(m_document->frame())) {
2307             processGenericRawTextStartTag(token);
2308             return true;
2309         }
2310         m_tree.insertHTMLElement(token);
2311         setInsertionMode(InHeadNoscriptMode);
2312         return true;
2313     }
2314     if (token.name() == noframesTag || token.name() == styleTag) {
2315         processGenericRawTextStartTag(token);
2316         return true;
2317     }
2318     if (token.name() == scriptTag) {
2319         processScriptStartTag(token);
2320         return true;
2321     }
2322     if (token.name() == headTag) {
2323         parseError(token);
2324         return true;
2325     }
2326     return false;
2327 }
2328
2329 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2330 {
2331     ASSERT(token.type() == HTMLToken::StartTag);
2332     m_tree.insertHTMLElement(token);
2333     m_tokenizer->setState(HTMLTokenizer::RCDATAState);
2334     m_originalInsertionMode = m_insertionMode;
2335     m_insertionMode = TextMode;
2336 }
2337
2338 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2339 {
2340     ASSERT(token.type() == HTMLToken::StartTag);
2341     m_tree.insertHTMLElement(token);
2342     m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
2343     m_originalInsertionMode = m_insertionMode;
2344     m_insertionMode = TextMode;
2345 }
2346
2347 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2348 {
2349     ASSERT(token.type() == HTMLToken::StartTag);
2350     m_tree.insertScriptElement(token);
2351     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
2352     m_originalInsertionMode = m_insertionMode;
2353     m_insertionMode = TextMode;
2354 }
2355
2356 void HTMLTreeBuilder::finished()
2357 {
2358     // We should call m_document->finishedParsing() here, except
2359     // m_legacyTreeBuilder->finished() does it for us.
2360     if (m_legacyTreeBuilder) {
2361         m_legacyTreeBuilder->finished();
2362         return;
2363     }
2364
2365     // Warning, this may delete the parser, so don't try to do anything else after this.
2366     if (!m_isParsingFragment)
2367         m_document->finishedParsing();
2368 }
2369
2370 bool HTMLTreeBuilder::isScriptingFlagEnabled(Frame* frame)
2371 {
2372     if (!frame)
2373         return false;
2374     if (ScriptController* scriptController = frame->script())
2375         return scriptController->canExecuteScripts(NotAboutToExecuteScript);
2376     return false;
2377 }
2378
2379 }