2010-07-09 Eric Seidel <eric@webkit.org>
[WebKit-https.git] / WebCore / html / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "HTMLTreeBuilder.h"
28
29 #include "Comment.h"
30 #include "DocumentFragment.h"
31 #include "DocumentType.h"
32 #include "Element.h"
33 #include "Frame.h"
34 #include "HTMLDocument.h"
35 #include "HTMLElementFactory.h"
36 #include "HTMLHtmlElement.h"
37 #include "HTMLNames.h"
38 #include "HTMLScriptElement.h"
39 #include "HTMLToken.h"
40 #include "HTMLTokenizer.h"
41 #include "LegacyHTMLDocumentParser.h"
42 #include "LegacyHTMLTreeBuilder.h"
43 #include "LocalizedStrings.h"
44 #if ENABLE(MATHML)
45 #include "MathMLNames.h"
46 #endif
47 #include "NotImplemented.h"
48 #if ENABLE(SVG)
49 #include "SVGNames.h"
50 #endif
51 #include "ScriptController.h"
52 #include "Settings.h"
53 #include "Text.h"
54 #include <wtf/UnusedParam.h>
55
56 namespace WebCore {
57
58 using namespace HTMLNames;
59
60 static const int uninitializedLineNumberValue = -1;
61
62 namespace {
63
64 inline bool isTreeBuilderWhitepace(UChar cc)
65 {
66     return cc == '\t' || cc == '\x0A' || cc == '\x0C' || cc == '\x0D' || cc == ' ';
67 }
68
69 inline bool hasNonWhitespace(const String& string)
70 {
71     const UChar* characters = string.characters();
72     const unsigned length = string.length();
73     for (unsigned i = 0; i < length; ++i) {
74         if (!isTreeBuilderWhitepace(characters[i]))
75             return true;
76     }
77     return false;
78 }
79
80 bool shouldUseLegacyTreeBuilder(Document* document)
81 {
82     return !document->settings() || !document->settings()->html5TreeBuilderEnabled();
83 }
84
85 bool isNumberedHeaderTag(const AtomicString& tagName)
86 {
87     return tagName == h1Tag
88         || tagName == h2Tag
89         || tagName == h3Tag
90         || tagName == h4Tag
91         || tagName == h5Tag
92         || tagName == h6Tag;
93 }
94
95 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
96 {
97     return tagName == captionTag
98         || tagName == colTag
99         || tagName == colgroupTag;
100 }
101
102 bool isTableCellContextTag(const AtomicString& tagName)
103 {
104     return tagName == thTag || tagName == tdTag;
105 }
106
107 bool isTableBodyContextTag(const AtomicString& tagName)
108 {
109     return tagName == tbodyTag
110         || tagName == tfootTag
111         || tagName == theadTag;
112 }
113
114 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
115 bool isSpecialTag(const AtomicString& tagName)
116 {
117     return tagName == addressTag
118         || tagName == articleTag
119         || tagName == asideTag
120         || tagName == baseTag
121         || tagName == basefontTag
122         || tagName == "bgsound"
123         || tagName == blockquoteTag
124         || tagName == bodyTag
125         || tagName == brTag
126         || tagName == buttonTag
127         || tagName == centerTag
128         || tagName == colTag
129         || tagName == colgroupTag
130         || tagName == "command"
131         || tagName == ddTag
132         || tagName == "details"
133         || tagName == dirTag
134         || tagName == divTag
135         || tagName == dlTag
136         || tagName == dtTag
137         || tagName == embedTag
138         || tagName == fieldsetTag
139         || tagName == "figure"
140         || tagName == footerTag
141         || tagName == formTag
142         || tagName == frameTag
143         || tagName == framesetTag
144         || isNumberedHeaderTag(tagName)
145         || tagName == headTag
146         || tagName == headerTag
147         || tagName == hgroupTag
148         || tagName == hrTag
149         || tagName == iframeTag
150         || tagName == imgTag
151         || tagName == inputTag
152         || tagName == isindexTag
153         || tagName == liTag
154         || tagName == linkTag
155         || tagName == listingTag
156         || tagName == menuTag
157         || tagName == metaTag
158         || tagName == navTag
159         || tagName == noembedTag
160         || tagName == noframesTag
161         || tagName == noscriptTag
162         || tagName == olTag
163         || tagName == pTag
164         || tagName == paramTag
165         || tagName == plaintextTag
166         || tagName == preTag
167         || tagName == scriptTag
168         || tagName == sectionTag
169         || tagName == selectTag
170         || tagName == styleTag
171         || isTableBodyContextTag(tagName)
172         || tagName == textareaTag
173         || tagName == titleTag
174         || tagName == trTag
175         || tagName == ulTag
176         || tagName == wbrTag
177         || tagName == xmpTag;
178 }
179
180 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#scoping
181 // Same as isScopingTag in LegacyHTMLTreeBuilder.cpp
182 // and isScopeMarker in HTMLElementStack.cpp
183 bool isScopingTag(const AtomicString& tagName)
184 {
185     return tagName == appletTag
186         || tagName == buttonTag
187         || tagName == captionTag
188 #if ENABLE(SVG_FOREIGN_OBJECT)
189         || tagName == SVGNames::foreignObjectTag
190 #endif
191         || tagName == htmlTag
192         || tagName == marqueeTag
193         || tagName == objectTag
194         || tagName == tableTag
195         || isTableCellContextTag(tagName);
196 }
197
198 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
199 {
200     return tagName == bTag
201         || tagName == bigTag
202         || tagName == codeTag
203         || tagName == emTag
204         || tagName == fontTag
205         || tagName == iTag
206         || tagName == sTag
207         || tagName == smallTag
208         || tagName == strikeTag
209         || tagName == strongTag
210         || tagName == ttTag
211         || tagName == uTag;
212 }
213
214 bool isNonAnchorFormattingTag(const AtomicString& tagName)
215 {
216     return tagName == nobrTag
217         || isNonAnchorNonNobrFormattingTag(tagName);
218 }
219
220 bool requiresRedirectToFosterParent(Element* element)
221 {
222     return element->hasTagName(tableTag)
223         || isTableBodyContextTag(element->localName())
224         || element->hasTagName(trTag);
225 }
226
227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
228 bool isFormattingTag(const AtomicString& tagName)
229 {
230     return tagName == aTag || isNonAnchorFormattingTag(tagName);
231 }
232
233 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#phrasing
234 bool isPhrasingTag(const AtomicString& tagName)
235 {
236     return !isSpecialTag(tagName) && !isScopingTag(tagName) && !isFormattingTag(tagName);
237 }
238
239 bool isNotFormattingAndNotPhrasing(const Element* element)
240 {
241     // The spec often says "node is not in the formatting category, and is not
242     // in the phrasing category". !phrasing && !formatting == scoping || special
243     // scoping || special is easier to compute.
244     // FIXME: localName() is wrong for non-html content.
245     const AtomicString& tagName = element->localName();
246     return isScopingTag(tagName) || isSpecialTag(tagName);
247 }
248
249 } // namespace
250
251 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors)
252     : m_framesetOk(true)
253     , m_document(document)
254     , m_tree(document, FragmentScriptingAllowed)
255     , m_reportErrors(reportErrors)
256     , m_isPaused(false)
257     , m_insertionMode(InitialMode)
258     , m_originalInsertionMode(InitialMode)
259     , m_secondaryInsertionMode(InitialMode)
260     , m_tokenizer(tokenizer)
261     , m_legacyTreeBuilder(shouldUseLegacyTreeBuilder(document) ? new LegacyHTMLTreeBuilder(document, reportErrors) : 0)
262     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
263     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
264     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
265     , m_isParsingFragment(false)
266 {
267 }
268
269 // FIXME: Member variables should be grouped into self-initializing structs to
270 // minimize code duplication between these constructors.
271 HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
272     : m_framesetOk(true)
273     , m_document(fragment->document())
274     , m_tree(fragment->document(), scriptingPermission)
275     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
276     , m_isPaused(false)
277     , m_insertionMode(InitialMode)
278     , m_originalInsertionMode(InitialMode)
279     , m_secondaryInsertionMode(InitialMode)
280     , m_tokenizer(tokenizer)
281     , m_legacyTreeBuilder(new LegacyHTMLTreeBuilder(fragment, scriptingPermission))
282     , m_lastScriptElementStartLine(uninitializedLineNumberValue)
283     , m_scriptToProcessStartLine(uninitializedLineNumberValue)
284     , m_fragmentScriptingPermission(scriptingPermission)
285     , m_isParsingFragment(true)
286 {
287 }
288
289 HTMLTreeBuilder::~HTMLTreeBuilder()
290 {
291 }
292
293 static void convertToOldStyle(AtomicHTMLToken& token, Token& oldStyleToken)
294 {
295     switch (token.type()) {
296     case HTMLToken::Uninitialized:
297     case HTMLToken::DOCTYPE:
298         ASSERT_NOT_REACHED();
299         break;
300     case HTMLToken::EndOfFile:
301         ASSERT_NOT_REACHED();
302         notImplemented();
303         break;
304     case HTMLToken::StartTag:
305     case HTMLToken::EndTag: {
306         oldStyleToken.beginTag = (token.type() == HTMLToken::StartTag);
307         oldStyleToken.selfClosingTag = token.selfClosing();
308         oldStyleToken.tagName = token.name();
309         oldStyleToken.attrs = token.takeAtributes();
310         break;
311     }
312     case HTMLToken::Comment:
313         oldStyleToken.tagName = commentAtom;
314         oldStyleToken.text = token.comment().impl();
315         break;
316     case HTMLToken::Character:
317         oldStyleToken.tagName = textAtom;
318         oldStyleToken.text = token.characters().impl();
319         break;
320     }
321 }
322
323 void HTMLTreeBuilder::handleScriptStartTag()
324 {
325     notImplemented(); // The HTML frgment case?
326     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
327     notImplemented(); // Save insertion mode.
328 }
329
330 void HTMLTreeBuilder::handleScriptEndTag(Element* scriptElement, int scriptStartLine)
331 {
332     ASSERT(!m_scriptToProcess); // Caller never called takeScriptToProcess!
333     ASSERT(m_scriptToProcessStartLine == uninitializedLineNumberValue); // Caller never called takeScriptToProcess!
334     notImplemented(); // Save insertion mode and insertion point?
335
336     // Pause ourselves so that parsing stops until the script can be processed by the caller.
337     m_isPaused = true;
338     m_scriptToProcess = scriptElement;
339     // Lexer line numbers are 0-based, ScriptSourceCode expects 1-based lines,
340     // so we convert here before passing the line number off to HTMLScriptRunner.
341     m_scriptToProcessStartLine = scriptStartLine + 1;
342 }
343
344 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
345 {
346     // Unpause ourselves, callers may pause us again when processing the script.
347     // The HTML5 spec is written as though scripts are executed inside the tree
348     // builder.  We pause the parser to exit the tree builder, and then resume
349     // before running scripts.
350     m_isPaused = false;
351     scriptStartLine = m_scriptToProcessStartLine;
352     m_scriptToProcessStartLine = uninitializedLineNumberValue;
353     return m_scriptToProcess.release();
354 }
355
356 HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
357 {
358     if (tagName == textareaTag || tagName == titleTag)
359         return HTMLTokenizer::RCDATAState;
360
361     if (tagName == styleTag
362         || tagName == iframeTag
363         || tagName == xmpTag
364         || tagName == noembedTag
365         || tagName == noframesTag
366         || (tagName == noscriptTag && isScriptingFlagEnabled(frame)))
367         return HTMLTokenizer::RAWTEXTState;
368
369     if (tagName == plaintextTag)
370         return HTMLTokenizer::PLAINTEXTState;
371
372     return state;
373 }
374
375 void HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token)
376 {
377     if (token.type() == HTMLToken::DOCTYPE) {
378         DoctypeToken doctypeToken;
379         doctypeToken.m_name.append(token.name().data(), token.name().size());
380         doctypeToken.m_publicID = token.publicIdentifier();
381         doctypeToken.m_systemID = token.systemIdentifier();
382         doctypeToken.m_forceQuirks = token.forceQuirks();
383
384         m_legacyTreeBuilder->parseDoctypeToken(&doctypeToken);
385         return;
386     }
387
388     if (token.type() == HTMLToken::EndOfFile)
389         return;
390
391     // For now, we translate into an old-style token for testing.
392     Token oldStyleToken;
393     AtomicHTMLToken atomicToken(token);
394     convertToOldStyle(atomicToken, oldStyleToken);
395
396     RefPtr<Node> result =  m_legacyTreeBuilder->parseToken(&oldStyleToken);
397     if (token.type() == HTMLToken::StartTag) {
398         // This work is supposed to be done by the parser, but
399         // when using the old parser for we have to do this manually.
400         if (oldStyleToken.tagName == scriptTag) {
401             handleScriptStartTag();
402             m_lastScriptElement = static_pointer_cast<Element>(result);
403             m_lastScriptElementStartLine = m_tokenizer->lineNumber();
404         } else if (oldStyleToken.tagName == preTag || oldStyleToken.tagName == listingTag)
405             m_tokenizer->skipLeadingNewLineForListing();
406         else
407             m_tokenizer->setState(adjustedLexerState(m_tokenizer->state(), oldStyleToken.tagName, m_document->frame()));
408     } else if (token.type() == HTMLToken::EndTag) {
409         if (oldStyleToken.tagName == scriptTag) {
410             if (m_lastScriptElement) {
411                 ASSERT(m_lastScriptElementStartLine != uninitializedLineNumberValue);
412                 if (m_fragmentScriptingPermission == FragmentScriptingNotAllowed) {
413                     // FIXME: This is a horrible hack for platform/Pasteboard.
414                     // Clear the <script> tag when using the Parser to create
415                     // a DocumentFragment for pasting so that javascript content
416                     // does not show up in pasted HTML.
417                     m_lastScriptElement->removeChildren();
418                 } else if (insertionMode() != AfterFramesetMode)
419                     handleScriptEndTag(m_lastScriptElement.get(), m_lastScriptElementStartLine);
420                 m_lastScriptElement = 0;
421                 m_lastScriptElementStartLine = uninitializedLineNumberValue;
422             }
423         } else if (oldStyleToken.tagName == framesetTag)
424             setInsertionMode(AfterFramesetMode);
425     }
426 }
427
428 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
429 {
430     if (m_legacyTreeBuilder) {
431         passTokenToLegacyParser(rawToken);
432         return;
433     }
434
435     AtomicHTMLToken token(rawToken);
436     processToken(token);
437 }
438
439 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
440 {
441     switch (token.type()) {
442     case HTMLToken::Uninitialized:
443         ASSERT_NOT_REACHED();
444         break;
445     case HTMLToken::DOCTYPE:
446         processDoctypeToken(token);
447         break;
448     case HTMLToken::StartTag:
449         processStartTag(token);
450         break;
451     case HTMLToken::EndTag:
452         processEndTag(token);
453         break;
454     case HTMLToken::Comment:
455         processComment(token);
456         return;
457     case HTMLToken::Character:
458         processCharacter(token);
459         break;
460     case HTMLToken::EndOfFile:
461         processEndOfFile(token);
462         break;
463     }
464 }
465
466 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
467 {
468     ASSERT(token.type() == HTMLToken::DOCTYPE);
469     if (insertionMode() == InitialMode) {
470         m_tree.insertDoctype(token);
471         return;
472     }
473     parseError(token);
474 }
475
476 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
477 {
478     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
479     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
480     processStartTag(fakeToken);
481 }
482
483 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
484 {
485     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
486     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
487     processEndTag(fakeToken);
488 }
489
490 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
491 {
492     AtomicHTMLToken fakeToken(characters);
493     processCharacter(fakeToken);
494 }
495
496 void HTMLTreeBuilder::processFakePEndTagIfPInScope()
497 {
498     if (!m_tree.openElements()->inScope(pTag.localName()))
499         return;
500     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
501     processEndTag(endP);
502 }
503
504 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
505 {
506     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
507     if (!attributes)
508         attributes = NamedNodeMap::create();
509     else {
510         attributes->removeAttribute(nameAttr);
511         attributes->removeAttribute(actionAttr);
512         attributes->removeAttribute(promptAttr);
513     }
514
515     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
516     attributes->insertAttribute(mappedAttribute.release(), false);
517     return attributes.release();
518 }
519
520 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
521 {
522     ASSERT(token.type() == HTMLToken::StartTag);
523     ASSERT(token.name() == isindexTag);
524     parseError(token);
525     if (m_tree.form())
526         return;
527     notImplemented(); // Acknowledge self-closing flag
528     processFakeStartTag(formTag);
529     Attribute* actionAttribute = token.getAttributeItem(actionAttr);
530     if (actionAttribute) {
531         ASSERT(m_tree.currentElement()->hasTagName(formTag));
532         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
533     }
534     processFakeStartTag(hrTag);
535     processFakeStartTag(labelTag);
536     Attribute* promptAttribute = token.getAttributeItem(promptAttr);
537     if (promptAttribute)
538         processFakeCharacters(promptAttribute->value());
539     else
540         processFakeCharacters(searchableIndexIntroduction());
541     processFakeStartTag(inputTag, attributesForIsindexInput(token));
542     notImplemented(); // This second set of characters may be needed by non-english locales.
543     processFakeEndTag(labelTag);
544     processFakeStartTag(hrTag);
545     processFakeEndTag(formTag);
546 }
547
548 namespace {
549
550 bool isLi(const Element* element)
551 {
552     return element->hasTagName(liTag);
553 }
554
555 bool isDdOrDt(const Element* element)
556 {
557     return element->hasTagName(ddTag)
558         || element->hasTagName(dtTag);
559 }
560
561 }
562
563 template <bool shouldClose(const Element*)>
564 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
565 {
566     m_framesetOk = false;
567     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
568     while (1) {
569         Element* node = nodeRecord->element();
570         if (shouldClose(node)) {
571             processFakeEndTag(node->tagQName());
572             break;
573         }
574         if (isNotFormattingAndNotPhrasing(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
575             break;
576         nodeRecord = nodeRecord->next();
577     }
578     processFakePEndTagIfPInScope();
579     m_tree.insertHTMLElement(token);
580 }
581
582 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
583 {
584     ASSERT(token.type() == HTMLToken::StartTag);
585     if (token.name() == htmlTag) {
586         m_tree.insertHTMLHtmlStartTagInBody(token);
587         return;
588     }
589     if (token.name() == baseTag
590         || token.name() == "command"
591         || token.name() == linkTag
592         || token.name() == metaTag
593         || token.name() == noframesTag
594         || token.name() == scriptTag
595         || token.name() == styleTag
596         || token.name() == titleTag) {
597         bool didProcess = processStartTagForInHead(token);
598         ASSERT_UNUSED(didProcess, didProcess);
599         return;
600     }
601     if (token.name() == bodyTag) {
602         m_tree.insertHTMLBodyStartTagInBody(token);
603         return;
604     }
605     if (token.name() == framesetTag) {
606         parseError(token);
607         notImplemented(); // fragment case
608         if (!m_framesetOk)
609             return;
610         ExceptionCode ec = 0;
611         m_tree.openElements()->bodyElement()->remove(ec);
612         ASSERT(!ec);
613         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
614         m_tree.openElements()->popHTMLBodyElement();
615         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
616         m_tree.insertHTMLElement(token);
617         m_insertionMode = InFramesetMode;
618         return;
619     }
620     if (token.name() == addressTag
621         || token.name() == articleTag
622         || token.name() == asideTag
623         || token.name() == blockquoteTag
624         || token.name() == centerTag
625         || token.name() == "details"
626         || token.name() == dirTag
627         || token.name() == divTag
628         || token.name() == dlTag
629         || token.name() == fieldsetTag
630         || token.name() == "figure"
631         || token.name() == footerTag
632         || token.name() == headerTag
633         || token.name() == hgroupTag
634         || token.name() == menuTag
635         || token.name() == navTag
636         || token.name() == olTag
637         || token.name() == pTag
638         || token.name() == sectionTag
639         || token.name() == ulTag) {
640         processFakePEndTagIfPInScope();
641         m_tree.insertHTMLElement(token);
642         return;
643     }
644     if (isNumberedHeaderTag(token.name())) {
645         processFakePEndTagIfPInScope();
646         if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
647             parseError(token);
648             m_tree.openElements()->pop();
649         }
650         m_tree.insertHTMLElement(token);
651         return;
652     }
653     if (token.name() == preTag || token.name() == listingTag) {
654         processFakePEndTagIfPInScope();
655         m_tree.insertHTMLElement(token);
656         m_tokenizer->skipLeadingNewLineForListing();
657         m_framesetOk = false;
658         return;
659     }
660     if (token.name() == formTag) {
661         if (m_tree.form()) {
662             parseError(token);
663             return;
664         }
665         processFakePEndTagIfPInScope();
666         m_tree.insertHTMLElement(token);
667         m_tree.setForm(m_tree.currentElement());
668         return;
669     }
670     if (token.name() == liTag) {
671         processCloseWhenNestedTag<isLi>(token);
672         return;
673     }
674     if (token.name() == ddTag || token.name() == dtTag) {
675         processCloseWhenNestedTag<isDdOrDt>(token);
676         return;
677     }
678     if (token.name() == plaintextTag) {
679         processFakePEndTagIfPInScope();
680         m_tree.insertHTMLElement(token);
681         m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
682         return;
683     }
684     if (token.name() == buttonTag) {
685         if (m_tree.openElements()->inScope(buttonTag)) {
686             parseError(token);
687             processFakeEndTag(buttonTag);
688             processStartTag(token); // FIXME: Could we just fall through here?
689             return;
690         }
691         m_tree.reconstructTheActiveFormattingElements();
692         m_tree.insertHTMLElement(token);
693         m_framesetOk = false;
694         return;
695     }
696     if (token.name() == aTag) {
697         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
698         if (activeATag) {
699             parseError(token);
700             processFakeEndTag(aTag);
701             m_tree.activeFormattingElements()->remove(activeATag);
702             if (m_tree.openElements()->contains(activeATag))
703                 m_tree.openElements()->remove(activeATag);
704         }
705         m_tree.reconstructTheActiveFormattingElements();
706         m_tree.insertFormattingElement(token);
707         return;
708     }
709     if (isNonAnchorNonNobrFormattingTag(token.name())) {
710         m_tree.reconstructTheActiveFormattingElements();
711         m_tree.insertFormattingElement(token);
712         return;
713     }
714     if (token.name() == nobrTag) {
715         m_tree.reconstructTheActiveFormattingElements();
716         if (m_tree.openElements()->inScope(nobrTag)) {
717             parseError(token);
718             processFakeEndTag(nobrTag);
719             m_tree.reconstructTheActiveFormattingElements();
720         }
721         m_tree.insertFormattingElement(token);
722         return;
723     }
724     if (token.name() == appletTag
725         || token.name() == marqueeTag
726         || token.name() == objectTag) {
727         m_tree.reconstructTheActiveFormattingElements();
728         m_tree.insertHTMLElement(token);
729         m_tree.activeFormattingElements()->appendMarker();
730         m_framesetOk = false;
731         return;
732     }
733     if (token.name() == tableTag) {
734         if (m_document->parseMode() != Document::Compat && m_tree.openElements()->inScope(pTag))
735             processFakeEndTag(pTag);
736         m_tree.insertHTMLElement(token);
737         m_framesetOk = false;
738         m_insertionMode = InTableMode;
739         return;
740     }
741     if (token.name() == imageTag) {
742         parseError(token);
743         // Apparently we're not supposed to ask.
744         token.setName(imgTag.localName());
745         // Note the fall through to the imgTag handling below!
746     }
747     if (token.name() == areaTag
748         || token.name() == basefontTag
749         || token.name() == "bgsound"
750         || token.name() == brTag
751         || token.name() == embedTag
752         || token.name() == imgTag
753         || token.name() == inputTag
754         || token.name() == keygenTag
755         || token.name() == wbrTag) {
756         m_tree.reconstructTheActiveFormattingElements();
757         m_tree.insertSelfClosingHTMLElement(token);
758         m_framesetOk = false;
759         return;
760     }
761     if (token.name() == paramTag
762         || token.name() == sourceTag
763         || token.name() == "track") {
764         m_tree.insertSelfClosingHTMLElement(token);
765         return;
766     }
767     if (token.name() == hrTag) {
768         processFakePEndTagIfPInScope();
769         m_tree.insertSelfClosingHTMLElement(token);
770         m_framesetOk = false;
771         return;
772     }
773     if (token.name() == isindexTag) {
774         processIsindexStartTagForInBody(token);
775         return;
776     }
777     if (token.name() == textareaTag) {
778         m_tree.insertHTMLElement(token);
779         m_tokenizer->skipLeadingNewLineForListing();
780         m_tokenizer->setState(HTMLTokenizer::RCDATAState);
781         m_originalInsertionMode = m_insertionMode;
782         m_framesetOk = false;
783         m_insertionMode = TextMode;
784         return;
785     }
786     if (token.name() == xmpTag) {
787         processFakePEndTagIfPInScope();
788         m_tree.reconstructTheActiveFormattingElements();
789         m_framesetOk = false;
790         processGenericRawTextStartTag(token);
791         return;
792     }
793     if (token.name() == iframeTag) {
794         m_framesetOk = false;
795         processGenericRawTextStartTag(token);
796         return;
797     }
798     if (token.name() == noembedTag) {
799         processGenericRawTextStartTag(token);
800         return;
801     }
802     if (token.name() == noscriptTag && isScriptingFlagEnabled(m_document->frame())) {
803         processGenericRawTextStartTag(token);
804         return;
805     }
806     if (token.name() == selectTag) {
807         m_tree.reconstructTheActiveFormattingElements();
808         m_tree.insertHTMLElement(token);
809         m_framesetOk = false;
810         if (m_insertionMode == InTableMode
811              || m_insertionMode == InCaptionMode
812              || m_insertionMode == InColumnGroupMode
813              || m_insertionMode == InTableBodyMode
814              || m_insertionMode == InRowMode
815              || m_insertionMode == InCellMode)
816             m_insertionMode = InSelectInTableMode;
817         else
818             m_insertionMode = InSelectMode;
819         return;
820     }
821     if (token.name() == optgroupTag || token.name() == optionTag) {
822         if (m_tree.openElements()->inScope(optionTag.localName())) {
823             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
824             processEndTag(endOption);
825         }
826         m_tree.reconstructTheActiveFormattingElements();
827         m_tree.insertHTMLElement(token);
828         return;
829     }
830     if (token.name() == rpTag || token.name() == rtTag) {
831         if (m_tree.openElements()->inScope(rubyTag.localName())) {
832             m_tree.generateImpliedEndTags();
833             if (!m_tree.currentElement()->hasTagName(rubyTag)) {
834                 parseError(token);
835                 m_tree.openElements()->popUntil(rubyTag.localName());
836             }
837         }
838         m_tree.insertHTMLElement(token);
839         return;
840     }
841     if (token.name() == "math") {
842         // This is the MathML foreign content branch point.
843         notImplemented();
844     }
845     if (token.name() == "svg") {
846         // This is the SVG foreign content branch point.
847         notImplemented();
848     }
849     if (isCaptionColOrColgroupTag(token.name())
850         || token.name() == frameTag
851         || token.name() == headTag
852         || isTableBodyContextTag(token.name())
853         || isTableCellContextTag(token.name())
854         || token.name() == trTag) {
855         parseError(token);
856         return;
857     }
858     m_tree.reconstructTheActiveFormattingElements();
859     m_tree.insertHTMLElement(token);
860 }
861
862 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
863 {
864     if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
865         ASSERT(m_isParsingFragment);
866         // FIXME: parse error
867         return false;
868     }
869     m_tree.openElements()->pop();
870     m_insertionMode = InTableMode;
871     return true;
872 }
873
874 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
875 void HTMLTreeBuilder::closeTheCell()
876 {
877     ASSERT(insertionMode() == InCellMode);
878     if (m_tree.openElements()->inScope(tdTag)) {
879         ASSERT(!m_tree.openElements()->inScope(thTag));
880         processFakeEndTag(tdTag);
881         return;
882     }
883     ASSERT(m_tree.openElements()->inScope(thTag));
884     processFakeEndTag(thTag);
885     ASSERT(insertionMode() == InRowMode);
886 }
887
888 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
889 {
890     ASSERT(token.type() == HTMLToken::StartTag);
891     if (token.name() == captionTag) {
892         m_tree.openElements()->popUntilTableScopeMarker();
893         m_tree.activeFormattingElements()->appendMarker();
894         m_tree.insertHTMLElement(token);
895         m_insertionMode = InCaptionMode;
896         return;
897     }
898     if (token.name() == colgroupTag) {
899         m_tree.openElements()->popUntilTableScopeMarker();
900         m_tree.insertHTMLElement(token);
901         m_insertionMode = InColumnGroupMode;
902         return;
903     }
904     if (token.name() == colTag) {
905         processFakeStartTag(colgroupTag);
906         ASSERT(InColumnGroupMode);
907         processStartTag(token);
908         return;
909     }
910     if (isTableBodyContextTag(token.name())) {
911         m_tree.openElements()->popUntilTableScopeMarker();
912         m_tree.insertHTMLElement(token);
913         m_insertionMode = InTableBodyMode;
914         return;
915     }
916     if (isTableCellContextTag(token.name())
917         || token.name() == trTag) {
918         processFakeStartTag(tbodyTag);
919         ASSERT(insertionMode() == InTableBodyMode);
920         processStartTag(token);
921         return;
922     }
923     if (token.name() == tableTag) {
924         parseError(token);
925         if (!processTableEndTagForInTable()) {
926             ASSERT(m_isParsingFragment);
927             return;
928         }
929         processStartTag(token);
930         return;
931     }
932     if (token.name() == styleTag || token.name() == scriptTag) {
933         processStartTagForInHead(token);
934         return;
935     }
936     if (token.name() == inputTag) {
937         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
938         if (!typeAttribute || equalIgnoringCase(typeAttribute->value(), "hidden")) {
939             parseError(token);
940             m_tree.insertSelfClosingHTMLElement(token);
941             return;
942         }
943         // Fall through to "anything else" case.
944     }
945     if (token.name() == formTag) {
946         parseError(token);
947         if (m_tree.form())
948             return;
949         m_tree.insertSelfClosingHTMLElement(token);
950         return;
951     }
952     parseError(token);
953     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree, requiresRedirectToFosterParent(m_tree.currentElement()));
954     processStartTagForInBody(token);
955 }
956
957 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
958 {
959     ASSERT(token.type() == HTMLToken::StartTag);
960     switch (insertionMode()) {
961     case InitialMode:
962         ASSERT(insertionMode() == InitialMode);
963         processDefaultForInitialMode(token);
964         // Fall through.
965     case BeforeHTMLMode:
966         ASSERT(insertionMode() == BeforeHTMLMode);
967         if (token.name() == htmlTag) {
968             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
969             setInsertionMode(BeforeHeadMode);
970             return;
971         }
972         processDefaultForBeforeHTMLMode(token);
973         // Fall through.
974     case BeforeHeadMode:
975         ASSERT(insertionMode() == BeforeHeadMode);
976         if (token.name() == htmlTag) {
977             m_tree.insertHTMLHtmlStartTagInBody(token);
978             return;
979         }
980         if (token.name() == headTag) {
981             m_tree.insertHTMLHeadElement(token);
982             setInsertionMode(InHeadMode);
983             return;
984         }
985         processDefaultForBeforeHeadMode(token);
986         // Fall through.
987     case InHeadMode:
988         ASSERT(insertionMode() == InHeadMode);
989         if (processStartTagForInHead(token))
990             return;
991         processDefaultForInHeadMode(token);
992         // Fall through.
993     case AfterHeadMode:
994         ASSERT(insertionMode() == AfterHeadMode);
995         if (token.name() == htmlTag) {
996             m_tree.insertHTMLHtmlStartTagInBody(token);
997             return;
998         }
999         if (token.name() == bodyTag) {
1000             m_framesetOk = false;
1001             m_tree.insertHTMLBodyElement(token);
1002             m_insertionMode = InBodyMode;
1003             return;
1004         }
1005         if (token.name() == framesetTag) {
1006             m_tree.insertHTMLElement(token);
1007             setInsertionMode(InFramesetMode);
1008             return;
1009         }
1010         if (token.name() == baseTag
1011             || token.name() == linkTag
1012             || token.name() == metaTag
1013             || token.name() == noframesTag
1014             || token.name() == scriptTag
1015             || token.name() == styleTag
1016             || token.name() == titleTag) {
1017             parseError(token);
1018             ASSERT(m_tree.head());
1019             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1020             processStartTagForInHead(token);
1021             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1022             return;
1023         }
1024         if (token.name() == headTag) {
1025             parseError(token);
1026             return;
1027         }
1028         processDefaultForAfterHeadMode(token);
1029         // Fall through
1030     case InBodyMode:
1031         ASSERT(insertionMode() == InBodyMode);
1032         processStartTagForInBody(token);
1033         break;
1034     case InTableMode:
1035         ASSERT(insertionMode() == InTableMode);
1036         processStartTagForInTable(token);
1037         break;
1038     case InCaptionMode:
1039         ASSERT(insertionMode() == InCaptionMode);
1040         if (isCaptionColOrColgroupTag(token.name())
1041             || isTableBodyContextTag(token.name())
1042             || isTableCellContextTag(token.name())
1043             || token.name() == trTag) {
1044             parseError(token);
1045             if (!processCaptionEndTagForInCaption()) {
1046                 ASSERT(m_isParsingFragment);
1047                 return;
1048             }
1049             processStartTag(token);
1050             return;
1051         }
1052         processStartTagForInBody(token);
1053         break;
1054     case InColumnGroupMode:
1055         ASSERT(insertionMode() == InColumnGroupMode);
1056         if (token.name() == htmlTag) {
1057             m_tree.insertHTMLHtmlStartTagInBody(token);
1058             return;
1059         }
1060         if (token.name() == colTag) {
1061             m_tree.insertSelfClosingHTMLElement(token);
1062             return;
1063         }
1064         if (!processColgroupEndTagForInColumnGroup()) {
1065             ASSERT(m_isParsingFragment);
1066             return;
1067         }
1068         processStartTag(token);
1069         break;
1070     case InTableBodyMode:
1071         ASSERT(insertionMode() == InTableBodyMode);
1072         if (token.name() == trTag) {
1073             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1074             m_tree.insertHTMLElement(token);
1075             m_insertionMode = InRowMode;
1076             return;
1077         }
1078         if (isTableCellContextTag(token.name())) {
1079             parseError(token);
1080             processFakeStartTag(trTag);
1081             ASSERT(insertionMode() == InRowMode);
1082             processStartTag(token);
1083             return;
1084         }
1085         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1086             // FIXME: This is slow.
1087             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1088                 ASSERT(m_isParsingFragment);
1089                 parseError(token);
1090                 return;
1091             }
1092             m_tree.openElements()->popUntilTableBodyScopeMarker();
1093             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1094             processFakeEndTag(m_tree.currentElement()->tagQName());
1095             processStartTag(token);
1096             return;
1097         }
1098         processStartTagForInTable(token);
1099         break;
1100     case InRowMode:
1101         ASSERT(insertionMode() == InRowMode);
1102         if (isTableCellContextTag(token.name())) {
1103             m_tree.openElements()->popUntilTableRowScopeMarker();
1104             m_tree.insertHTMLElement(token);
1105             m_insertionMode = InCellMode;
1106             m_tree.activeFormattingElements()->appendMarker();
1107             return;
1108         }
1109         if (token.name() == trTag
1110             || isCaptionColOrColgroupTag(token.name())
1111             || isTableBodyContextTag(token.name())) {
1112             if (!processTrEndTagForInRow()) {
1113                 ASSERT(m_isParsingFragment);
1114                 return;
1115             }
1116             ASSERT(insertionMode() == InTableBodyMode);
1117             processStartTag(token);
1118             return;
1119         }
1120         processStartTagForInTable(token);
1121         break;
1122     case InCellMode:
1123         ASSERT(insertionMode() == InCellMode);
1124         if (isCaptionColOrColgroupTag(token.name())
1125             || isTableCellContextTag(token.name())
1126             || token.name() == trTag
1127             || isTableBodyContextTag(token.name())) {
1128             // FIXME: This could be more efficient.
1129             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1130                 ASSERT(m_isParsingFragment);
1131                 parseError(token);
1132                 return;
1133             }
1134             closeTheCell();
1135             processStartTag(token);
1136             return;
1137         }
1138         processStartTagForInBody(token);
1139         break;
1140     case AfterBodyMode:
1141     case AfterAfterBodyMode:
1142         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1143         if (token.name() == htmlTag) {
1144             m_tree.insertHTMLHtmlStartTagInBody(token);
1145             return;
1146         }
1147         m_insertionMode = InBodyMode;
1148         processStartTag(token);
1149         break;
1150     case InHeadNoscriptMode:
1151         ASSERT(insertionMode() == InHeadNoscriptMode);
1152         if (token.name() == htmlTag) {
1153             m_tree.insertHTMLHtmlStartTagInBody(token);
1154             return;
1155         }
1156         if (token.name() == linkTag
1157             || token.name() == metaTag
1158             || token.name() == noframesTag
1159             || token.name() == styleTag) {
1160             bool didProcess = processStartTagForInHead(token);
1161             ASSERT_UNUSED(didProcess, didProcess);
1162             return;
1163         }
1164         if (token.name() == htmlTag || token.name() == noscriptTag) {
1165             parseError(token);
1166             return;
1167         }
1168         processDefaultForInHeadNoscriptMode(token);
1169         processToken(token);
1170         break;
1171     case InFramesetMode:
1172         ASSERT(insertionMode() == InFramesetMode);
1173         if (token.name() == htmlTag) {
1174             m_tree.insertHTMLHtmlStartTagInBody(token);
1175             return;
1176         }
1177         if (token.name() == framesetTag) {
1178             m_tree.insertHTMLElement(token);
1179             return;
1180         }
1181         if (token.name() == frameTag) {
1182             m_tree.insertSelfClosingHTMLElement(token);
1183             return;
1184         }
1185         if (token.name() == noframesTag) {
1186             processStartTagForInHead(token);
1187             return;
1188         }
1189         parseError(token);
1190         break;
1191     case AfterFramesetMode:
1192     case AfterAfterFramesetMode:
1193         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1194         if (token.name() == htmlTag) {
1195             m_tree.insertHTMLHtmlStartTagInBody(token);
1196             return;
1197         }
1198         if (token.name() == noframesTag) {
1199             processStartTagForInHead(token);
1200             return;
1201         }
1202         parseError(token);
1203         break;
1204     case InSelectInTableMode:
1205         ASSERT(insertionMode() == InSelectInTableMode);
1206         if (token.name() == captionTag
1207             || token.name() == tableTag
1208             || isTableBodyContextTag(token.name())
1209             || token.name() == trTag
1210             || isTableCellContextTag(token.name())) {
1211             parseError(token);
1212             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1213             processEndTag(endSelect);
1214             processStartTag(token);
1215             return;
1216         }
1217         // Fall through
1218     case InSelectMode:
1219         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1220         if (token.name() == htmlTag) {
1221             m_tree.insertHTMLHtmlStartTagInBody(token);
1222             return;
1223         }
1224         if (token.name() == optionTag) {
1225             if (m_tree.currentElement()->hasTagName(optionTag)) {
1226                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1227                 processEndTag(endOption);
1228             }
1229             m_tree.insertHTMLElement(token);
1230             return;
1231         }
1232         if (token.name() == optgroupTag) {
1233             if (m_tree.currentElement()->hasTagName(optionTag)) {
1234                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1235                 processEndTag(endOption);
1236             }
1237             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
1238                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1239                 processEndTag(endOptgroup);
1240             }
1241             m_tree.insertHTMLElement(token);
1242             return;
1243         }
1244         if (token.name() == selectTag) {
1245             parseError(token);
1246             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1247             processEndTag(endSelect);
1248             return;
1249         }
1250         if (token.name() == inputTag
1251             || token.name() == keygenTag
1252             || token.name() == textareaTag) {
1253             parseError(token);
1254             notImplemented(); // fragment case
1255             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1256             processEndTag(endSelect);
1257             processStartTag(token);
1258             return;
1259         }
1260         if (token.name() == scriptTag) {
1261             bool didProcess = processStartTagForInHead(token);
1262             ASSERT_UNUSED(didProcess, didProcess);
1263             return;
1264         }
1265         break;
1266     case TextMode:
1267     case InTableTextMode:
1268     case InForeignContentMode:
1269         notImplemented();
1270         break;
1271     }
1272 }
1273
1274 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1275 {
1276     ASSERT(token.type() == HTMLToken::EndTag);
1277     ASSERT(token.name() == bodyTag);
1278     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1279         parseError(token);
1280         return false;
1281     }
1282     notImplemented();
1283     m_insertionMode = AfterBodyMode;
1284     return true;
1285 }
1286
1287 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1288 {
1289     ASSERT(token.type() == HTMLToken::EndTag);
1290     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1291     while (1) {
1292         Element* node = record->element();
1293         if (node->hasLocalName(token.name())) {
1294             m_tree.generateImpliedEndTags();
1295             if (!m_tree.currentElement()->hasLocalName(token.name())) {
1296                 parseError(token);
1297                 // FIXME: This is either a bug in the spec, or a bug in our
1298                 // implementation.  Filed a bug with HTML5:
1299                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1300                 // We might have already popped the node for the token in
1301                 // generateImpliedEndTags, just abort.
1302                 if (!m_tree.openElements()->contains(node))
1303                     return;
1304             }
1305             m_tree.openElements()->popUntilPopped(node);
1306             return;
1307         }
1308         if (isNotFormattingAndNotPhrasing(node)) {
1309             parseError(token);
1310             return;
1311         }
1312         record = record->next();
1313     }
1314 }
1315
1316 // FIXME: This probably belongs on HTMLElementStack.
1317 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1318 {
1319     HTMLElementStack::ElementRecord* furthestBlock = 0;
1320     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1321     for (; record; record = record->next()) {
1322         if (record->element() == formattingElement)
1323             return furthestBlock;
1324         if (isNotFormattingAndNotPhrasing(record->element()))
1325             furthestBlock = record;
1326     }
1327     ASSERT_NOT_REACHED();
1328     return 0;
1329 }
1330
1331 // FIXME: This should have a whitty name.
1332 // FIXME: This must be implemented in many other places in WebCore.
1333 void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
1334 {
1335     Node* child = oldParent->firstChild();
1336     while (child) {
1337         Node* nextChild = child->nextSibling();
1338         ExceptionCode ec;
1339         newParent->appendChild(child, ec);
1340         ASSERT(!ec);
1341         child = nextChild;
1342     }
1343 }
1344
1345 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1346 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1347 {
1348     while (1) {
1349         // 1.
1350         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1351         if (!formattingElement || !m_tree.openElements()->inScope(formattingElement)) {
1352             parseError(token);
1353             notImplemented(); // Check the stack of open elements for a more specific parse error.
1354             return;
1355         }
1356         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1357         if (!formattingElementRecord) {
1358             parseError(token);
1359             m_tree.activeFormattingElements()->remove(formattingElement);
1360             return;
1361         }
1362         if (formattingElement != m_tree.currentElement())
1363             parseError(token);
1364         // 2.
1365         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1366         // 3.
1367         if (!furthestBlock) {
1368             m_tree.openElements()->popUntilPopped(formattingElement);
1369             m_tree.activeFormattingElements()->remove(formattingElement);
1370             return;
1371         }
1372         // 4.
1373         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1374         Element* commonAncestor = formattingElementRecord->next()->element();
1375         // 5.
1376         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1377         // 6.
1378         HTMLElementStack::ElementRecord* node = furthestBlock;
1379         HTMLElementStack::ElementRecord* nextNode = node->next();
1380         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1381         while (1) {
1382             // 6.1
1383             node = nextNode;
1384             ASSERT(node);
1385             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1386             // 6.2
1387             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1388                 m_tree.openElements()->remove(node->element());
1389                 node = 0;
1390                 continue;
1391             }
1392             // 6.3
1393             if (node == formattingElementRecord)
1394                 break;
1395             // 6.5
1396             // FIXME: We're supposed to save the original token in the entry.
1397             AtomicHTMLToken fakeToken(HTMLToken::StartTag, node->element()->localName());
1398             // Is createHTMLElement correct? (instead of insertHTMLElement)
1399             // Does this code ever leave newElement unattached?
1400             RefPtr<Element> newElement = m_tree.createHTMLElement(fakeToken);
1401             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1402             nodeEntry->replaceElement(newElement.get());
1403             node->replaceElement(newElement.release());
1404             // 6.4 -- Intentionally out of order to handle the case where node
1405             // was replaced in 6.5.
1406             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1407             if (lastNode == furthestBlock)
1408                 bookmark.moveToAfter(node->element());
1409             // 6.6
1410             // Use appendChild instead of parserAddChild to handle possible reparenting.
1411             ExceptionCode ec;
1412             node->element()->appendChild(lastNode->element(), ec);
1413             ASSERT(!ec);
1414             // 6.7
1415             lastNode = node;
1416         }
1417         // 7
1418         const AtomicString& commonAncestorTag = commonAncestor->localName();
1419         if (commonAncestorTag == tableTag
1420             || commonAncestorTag == trTag
1421             || isTableBodyContextTag(commonAncestorTag))
1422             m_tree.fosterParent(lastNode->element());
1423         else {
1424             ExceptionCode ec;
1425             commonAncestor->appendChild(lastNode->element(), ec);
1426             ASSERT(!ec);
1427         }
1428         // 8
1429         // FIXME: We're supposed to save the original token in the entry.
1430         AtomicHTMLToken fakeToken(HTMLToken::StartTag, formattingElement->localName());
1431         RefPtr<Element> newElement = m_tree.createHTMLElement(fakeToken);
1432         // 9
1433         reparentChildren(furthestBlock->element(), newElement.get());
1434         // 10
1435         furthestBlock->element()->parserAddChild(newElement);
1436         // 11
1437         m_tree.activeFormattingElements()->remove(formattingElement);
1438         m_tree.activeFormattingElements()->insertAt(newElement.get(), bookmark);
1439         // 12
1440         m_tree.openElements()->remove(formattingElement);
1441         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1442     }
1443 }
1444
1445 void HTMLTreeBuilder::setInsertionModeAndEnd(InsertionMode newInsertionMode, bool foreign)
1446 {
1447     m_insertionMode = newInsertionMode;
1448     if (foreign) {
1449         m_secondaryInsertionMode = m_insertionMode;
1450         m_insertionMode = InForeignContentMode;
1451     }
1452 }
1453
1454 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1455 {
1456     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1457     bool last = false;
1458     bool foreign = false;
1459     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1460     while (1) {
1461         Element* node = nodeRecord->element();
1462         if (node == m_tree.openElements()->bottom()) {
1463             ASSERT(m_isParsingFragment);
1464             last = true;
1465             notImplemented(); // node = m_contextElement;
1466         }
1467         if (node->hasTagName(selectTag)) {
1468             ASSERT(m_isParsingFragment);
1469             return setInsertionModeAndEnd(InSelectMode, foreign);
1470         }
1471         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1472             return setInsertionModeAndEnd(InCellMode, foreign);
1473         if (node->hasTagName(trTag))
1474             return setInsertionModeAndEnd(InRowMode, foreign);
1475         if (isTableBodyContextTag(node->localName()))
1476             return setInsertionModeAndEnd(InTableBodyMode, foreign);
1477         if (node->hasTagName(captionTag))
1478             return setInsertionModeAndEnd(InCaptionMode, foreign);
1479         if (node->hasTagName(colgroupTag)) {
1480             ASSERT(m_isParsingFragment);
1481             return setInsertionModeAndEnd(InColumnGroupMode, foreign);
1482         }
1483         if (node->hasTagName(tableTag))
1484             return setInsertionModeAndEnd(InTableMode, foreign);
1485         if (node->hasTagName(headTag)) {
1486             ASSERT(m_isParsingFragment);
1487             return setInsertionModeAndEnd(InBodyMode, foreign);
1488         }
1489         if (node->hasTagName(bodyTag))
1490             return setInsertionModeAndEnd(InBodyMode, foreign);
1491         if (node->hasTagName(framesetTag)) {
1492             ASSERT(m_isParsingFragment);
1493             return setInsertionModeAndEnd(InFramesetMode, foreign);
1494         }
1495         if (node->hasTagName(htmlTag)) {
1496             ASSERT(m_isParsingFragment);
1497             return setInsertionModeAndEnd(BeforeHeadMode, foreign);
1498         }
1499         if (false
1500 #if ENABLE(SVG)
1501         || node->namespaceURI() == SVGNames::svgNamespaceURI
1502 #endif
1503 #if ENABLE(MATHML)
1504         || node->namespaceURI() == MathMLNames::mathmlNamespaceURI
1505 #endif
1506             )
1507             foreign = true;
1508         if (last) {
1509             ASSERT(m_isParsingFragment);
1510             return setInsertionModeAndEnd(InBodyMode, foreign);
1511         }
1512         nodeRecord = nodeRecord->next();
1513     }
1514 }
1515
1516 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1517 {
1518     ASSERT(token.type() == HTMLToken::EndTag);
1519     if (token.name() == bodyTag) {
1520         processBodyEndTagForInBody(token);
1521         return;
1522     }
1523     if (token.name() == htmlTag) {
1524         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1525         if (processBodyEndTagForInBody(endBody))
1526             processEndTag(token);
1527         return;
1528     }
1529     if (token.name() == addressTag
1530         || token.name() == articleTag
1531         || token.name() == asideTag
1532         || token.name() == blockquoteTag
1533         || token.name() == buttonTag
1534         || token.name() == centerTag
1535         || token.name() == "details"
1536         || token.name() == dirTag
1537         || token.name() == divTag
1538         || token.name() == dlTag
1539         || token.name() == fieldsetTag
1540         || token.name() == "figure"
1541         || token.name() == footerTag
1542         || token.name() == headerTag
1543         || token.name() == hgroupTag
1544         || token.name() == listingTag
1545         || token.name() == menuTag
1546         || token.name() == navTag
1547         || token.name() == olTag
1548         || token.name() == preTag
1549         || token.name() == sectionTag
1550         || token.name() == ulTag) {
1551         if (!m_tree.openElements()->inScope(token.name())) {
1552             parseError(token);
1553             return;
1554         }
1555         m_tree.generateImpliedEndTags();
1556         if (!m_tree.currentElement()->hasLocalName(token.name()))
1557             parseError(token);
1558         m_tree.openElements()->popUntilPopped(token.name());
1559         return;
1560     }
1561     if (token.name() == formTag) {
1562         RefPtr<Element> node = m_tree.takeForm();
1563         if (!node || !m_tree.openElements()->inScope(node.get())) {
1564             parseError(token);
1565             return;
1566         }
1567         m_tree.generateImpliedEndTags();
1568         if (m_tree.currentElement() != node.get())
1569             parseError(token);
1570         m_tree.openElements()->remove(node.get());
1571     }
1572     if (token.name() == pTag) {
1573         if (!m_tree.openElements()->inScope(token.name())) {
1574             parseError(token);
1575             processFakeStartTag(pTag);
1576             ASSERT(m_tree.openElements()->inScope(token.name()));
1577             processEndTag(token);
1578             return;
1579         }
1580         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1581         if (!m_tree.currentElement()->hasLocalName(token.name()))
1582             parseError(token);
1583         m_tree.openElements()->popUntilPopped(token.name());
1584         return;
1585     }
1586     if (token.name() == liTag) {
1587         if (!m_tree.openElements()->inListItemScope(token.name())) {
1588             parseError(token);
1589             return;
1590         }
1591         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1592         if (!m_tree.currentElement()->hasLocalName(token.name()))
1593             parseError(token);
1594         m_tree.openElements()->popUntilPopped(token.name());
1595         return;
1596     }
1597     if (token.name() == ddTag
1598         || token.name() == dtTag) {
1599         if (!m_tree.openElements()->inScope(token.name())) {
1600             parseError(token);
1601             return;
1602         }
1603         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1604         if (!m_tree.currentElement()->hasLocalName(token.name()))
1605             parseError(token);
1606         m_tree.openElements()->popUntilPopped(token.name());
1607         return;
1608     }
1609     if (isNumberedHeaderTag(token.name())) {
1610         if (!m_tree.openElements()->inScope(token.name())) {
1611             parseError(token);
1612             return;
1613         }
1614         m_tree.generateImpliedEndTags();
1615         if (!m_tree.currentElement()->hasLocalName(token.name()))
1616             parseError(token);
1617         m_tree.openElements()->popUntilPopped(token.name());
1618         return;
1619     }
1620     if (token.name() == "sarcasm") {
1621         notImplemented(); // Take a deep breath.
1622         return;
1623     }
1624     if (isFormattingTag(token.name())) {
1625         callTheAdoptionAgency(token);
1626         return;
1627     }
1628     if (token.name() == appletTag
1629         || token.name() == marqueeTag
1630         || token.name() == objectTag) {
1631         if (!m_tree.openElements()->inScope(token.name())) {
1632             parseError(token);
1633             return;
1634         }
1635         m_tree.generateImpliedEndTags();
1636         if (!m_tree.currentElement()->hasLocalName(token.name()))
1637             parseError(token);
1638         m_tree.openElements()->popUntilPopped(token.name());
1639         m_tree.activeFormattingElements()->clearToLastMarker();
1640         return;
1641     }
1642     if (token.name() == brTag) {
1643         parseError(token);
1644         processFakeStartTag(brTag);
1645         return;
1646     }
1647     processAnyOtherEndTagForInBody(token);
1648 }
1649
1650 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1651 {
1652     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1653         ASSERT(m_isParsingFragment);
1654         // FIXME: parse error
1655         return false;
1656     }
1657     m_tree.generateImpliedEndTags();
1658     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
1659     m_tree.openElements()->popUntilPopped(captionTag.localName());
1660     m_tree.activeFormattingElements()->clearToLastMarker();
1661     m_insertionMode = InTableMode;
1662     return true;
1663 }
1664
1665 bool HTMLTreeBuilder::processTrEndTagForInRow()
1666 {
1667     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
1668         ASSERT(m_isParsingFragment);
1669         // FIXME: parse error
1670         return false;
1671     }
1672     m_tree.openElements()->popUntilTableRowScopeMarker();
1673     ASSERT(m_tree.currentElement()->hasTagName(trTag));
1674     m_tree.openElements()->pop();
1675     m_insertionMode = InTableBodyMode;
1676     return true;
1677 }
1678
1679 bool HTMLTreeBuilder::processTableEndTagForInTable()
1680 {
1681     if (!m_tree.openElements()->inTableScope(tableTag)) {
1682         ASSERT(m_isParsingFragment);
1683         // FIXME: parse error.
1684         return false;
1685     }
1686     m_tree.openElements()->popUntilPopped(tableTag.localName());
1687     resetInsertionModeAppropriately();
1688     return true;
1689 }
1690
1691 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
1692 {
1693     ASSERT(token.type() == HTMLToken::EndTag);
1694     if (token.name() == tableTag) {
1695         processTableEndTagForInTable();
1696         return;
1697     }
1698     if (token.name() == bodyTag
1699         || isCaptionColOrColgroupTag(token.name())
1700         || token.name() == htmlTag
1701         || isTableBodyContextTag(token.name())
1702         || isTableCellContextTag(token.name())
1703         || token.name() == trTag) {
1704         parseError(token);
1705         return;
1706     }
1707     // Is this redirection necessary here?
1708     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree, requiresRedirectToFosterParent(m_tree.currentElement()));
1709     processEndTagForInBody(token);
1710 }
1711
1712 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
1713 {
1714     ASSERT(token.type() == HTMLToken::EndTag);
1715     switch (insertionMode()) {
1716     case InitialMode:
1717         ASSERT(insertionMode() == InitialMode);
1718         processDefaultForInitialMode(token);
1719         // Fall through.
1720     case BeforeHTMLMode:
1721         ASSERT(insertionMode() == BeforeHTMLMode);
1722         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1723             parseError(token);
1724             return;
1725         }
1726         processDefaultForBeforeHTMLMode(token);
1727         // Fall through.
1728     case BeforeHeadMode:
1729         ASSERT(insertionMode() == BeforeHeadMode);
1730         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1731             parseError(token);
1732             return;
1733         }
1734         processDefaultForBeforeHeadMode(token);
1735         // Fall through.
1736     case InHeadMode:
1737         ASSERT(insertionMode() == InHeadMode);
1738         if (token.name() == headTag) {
1739             m_tree.openElements()->popHTMLHeadElement();
1740             setInsertionMode(AfterHeadMode);
1741             return;
1742         }
1743         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1744             parseError(token);
1745             return;
1746         }
1747         processDefaultForInHeadMode(token);
1748         // Fall through.
1749     case AfterHeadMode:
1750         ASSERT(insertionMode() == AfterHeadMode);
1751         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1752             parseError(token);
1753             return;
1754         }
1755         processDefaultForAfterHeadMode(token);
1756         // Fall through
1757     case InBodyMode:
1758         ASSERT(insertionMode() == InBodyMode);
1759         processEndTagForInBody(token);
1760         break;
1761     case InTableMode:
1762         ASSERT(insertionMode() == InTableMode);
1763         processEndTagForInTable(token);
1764         break;
1765     case InCaptionMode:
1766         ASSERT(insertionMode() == InCaptionMode);
1767         if (token.name() == captionTag) {
1768             processCaptionEndTagForInCaption();
1769             return;
1770         }
1771         if (token.name() == tableTag) {
1772             parseError(token);
1773             if (!processCaptionEndTagForInCaption()) {
1774                 ASSERT(m_isParsingFragment);
1775                 return;
1776             }
1777             processEndTag(token);
1778             return;
1779         }
1780         if (token.name() == bodyTag
1781             || token.name() == colTag
1782             || token.name() == colgroupTag
1783             || token.name() == htmlTag
1784             || isTableBodyContextTag(token.name())
1785             || isTableCellContextTag(token.name())
1786             || token.name() == trTag) {
1787             parseError(token);
1788             return;
1789         }
1790         processEndTagForInBody(token);
1791         break;
1792     case InColumnGroupMode:
1793         ASSERT(insertionMode() == InColumnGroupMode);
1794         if (token.name() == colgroupTag) {
1795             processColgroupEndTagForInColumnGroup();
1796             return;
1797         }
1798         if (token.name() == colTag) {
1799             parseError(token);
1800             return;
1801         }
1802         if (!processColgroupEndTagForInColumnGroup()) {
1803             ASSERT(m_isParsingFragment);
1804             return;
1805         }
1806         processEndTag(token);
1807         break;
1808     case InRowMode:
1809         ASSERT(insertionMode() == InRowMode);
1810         if (token.name() == trTag) {
1811             processTrEndTagForInRow();
1812             return;
1813         }
1814         if (token.name() == tableTag) {
1815             if (!processTrEndTagForInRow()) {
1816                 ASSERT(m_isParsingFragment);
1817                 return;
1818             }
1819             ASSERT(insertionMode() == InTableBodyMode);
1820             processEndTag(token);
1821             return;
1822         }
1823         if (isTableBodyContextTag(token.name())) {
1824             if (!m_tree.openElements()->inTableScope(token.name())) {
1825                 parseError(token);
1826                 return;
1827             }
1828             processFakeEndTag(trTag);
1829             ASSERT(insertionMode() == InTableBodyMode);
1830             processEndTag(token);
1831             return;
1832         }
1833         if (token.name() == bodyTag
1834             || isCaptionColOrColgroupTag(token.name())
1835             || token.name() == htmlTag
1836             || isTableCellContextTag(token.name())) {
1837             parseError(token);
1838             return;
1839         }
1840         processEndTagForInTable(token);
1841         break;
1842     case InCellMode:
1843         ASSERT(insertionMode() == InCellMode);
1844         if (isTableCellContextTag(token.name())) {
1845             if (!m_tree.openElements()->inTableScope(token.name())) {
1846                 parseError(token);
1847                 return;
1848             }
1849             m_tree.generateImpliedEndTags();
1850             if (!m_tree.currentElement()->hasLocalName(token.name()))
1851                 parseError(token);
1852             m_tree.openElements()->popUntilPopped(token.name());
1853             m_tree.activeFormattingElements()->clearToLastMarker();
1854             m_insertionMode = InRowMode;
1855             ASSERT(m_tree.currentElement()->hasTagName(trTag));
1856             return;
1857         }
1858         if (token.name() == bodyTag
1859             || isCaptionColOrColgroupTag(token.name())
1860             || token.name() == htmlTag) {
1861             parseError(token);
1862             return;
1863         }
1864         if (token.name() == tableTag
1865             || token.name() == trTag
1866             || isTableBodyContextTag(token.name())) {
1867             if (!m_tree.openElements()->inTableScope(token.name())) {
1868                 ASSERT(m_isParsingFragment);
1869                 // FIXME: It is unclear what the exact ASSERT should be.
1870                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098
1871                 parseError(token);
1872                 return;
1873             }
1874             closeTheCell();
1875             processEndTag(token);
1876             return;
1877         }
1878         processEndTagForInBody(token);
1879         break;
1880     case InTableBodyMode:
1881         ASSERT(insertionMode() == InTableBodyMode);
1882         if (isTableBodyContextTag(token.name())) {
1883             if (!m_tree.openElements()->inTableScope(token.name())) {
1884                 parseError(token);
1885                 return;
1886             }
1887             m_tree.openElements()->popUntilTableBodyScopeMarker();
1888             m_tree.openElements()->pop();
1889             m_insertionMode = InTableMode;
1890             return;
1891         }
1892         if (token.name() == tableTag) {
1893             // FIXME: This is slow.
1894             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1895                 ASSERT(m_isParsingFragment);
1896                 parseError(token);
1897                 return;
1898             }
1899             m_tree.openElements()->popUntilTableBodyScopeMarker();
1900             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1901             processFakeEndTag(m_tree.currentElement()->tagQName());
1902             processEndTag(token);
1903             return;
1904         }
1905         if (token.name() == bodyTag
1906             || isCaptionColOrColgroupTag(token.name())
1907             || token.name() == htmlTag
1908             || isTableCellContextTag(token.name())
1909             || token.name() == trTag) {
1910             parseError(token);
1911             return;
1912         }
1913         processEndTagForInTable(token);
1914         break;
1915     case AfterBodyMode:
1916         ASSERT(insertionMode() == AfterBodyMode);
1917         if (token.name() == htmlTag) {
1918             if (m_isParsingFragment) {
1919                 parseError(token);
1920                 return;
1921             }
1922             m_insertionMode = AfterAfterBodyMode;
1923             return;
1924         }
1925         // Fall through.
1926     case AfterAfterBodyMode:
1927         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1928         parseError(token);
1929         m_insertionMode = InBodyMode;
1930         processEndTag(token);
1931         break;
1932     case InHeadNoscriptMode:
1933         ASSERT(insertionMode() == InHeadNoscriptMode);
1934         if (token.name() == noscriptTag) {
1935             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
1936             m_tree.openElements()->pop();
1937             ASSERT(m_tree.currentElement()->hasTagName(headTag));
1938             setInsertionMode(InHeadMode);
1939             return;
1940         }
1941         if (token.name() != brTag) {
1942             parseError(token);
1943             return;
1944         }
1945         processDefaultForInHeadNoscriptMode(token);
1946         processToken(token);
1947         break;
1948     case TextMode:
1949         if (token.name() == scriptTag) {
1950             // Pause ourselves so that parsing stops until the script can be processed by the caller.
1951             m_isPaused = true;
1952             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
1953             m_scriptToProcess = m_tree.currentElement();
1954             m_tree.openElements()->pop();
1955             m_insertionMode = m_originalInsertionMode;
1956             return;
1957         }
1958         m_tree.openElements()->pop();
1959         m_insertionMode = m_originalInsertionMode;
1960         break;
1961     case InFramesetMode:
1962         ASSERT(insertionMode() == InFramesetMode);
1963         if (token.name() == framesetTag) {
1964             if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
1965                 parseError(token);
1966                 return;
1967             }
1968             m_tree.openElements()->pop();
1969             if (!m_isParsingFragment && !m_tree.currentElement()->hasTagName(framesetTag))
1970                 m_insertionMode = AfterFramesetMode;
1971             return;
1972         }
1973         break;
1974     case AfterFramesetMode:
1975         ASSERT(insertionMode() == AfterFramesetMode);
1976         if (token.name() == htmlTag) {
1977             m_insertionMode = AfterAfterFramesetMode;
1978             return;
1979         }
1980         // Fall through.
1981     case AfterAfterFramesetMode:
1982         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1983         parseError(token);
1984         break;
1985     case InSelectInTableMode:
1986         ASSERT(insertionMode() == InSelectInTableMode);
1987         if (token.name() == captionTag
1988             || token.name() == tableTag
1989             || isTableBodyContextTag(token.name())
1990             || token.name() == trTag
1991             || isTableCellContextTag(token.name())) {
1992             parseError(token);
1993             if (m_tree.openElements()->inTableScope(token.name())) {
1994                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1995                 processEndTag(endSelect);
1996                 processEndTag(token);
1997             }
1998             return;
1999         }
2000         // Fall through.
2001     case InSelectMode:
2002         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2003         if (token.name() == optgroupTag) {
2004             if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2005                 processFakeEndTag(optionTag);
2006             if (m_tree.currentElement()->hasTagName(optgroupTag)) {
2007                 m_tree.openElements()->pop();
2008                 return;
2009             }
2010             parseError(token);
2011             return;
2012         }
2013         if (token.name() == optionTag) {
2014             if (m_tree.currentElement()->hasTagName(optionTag)) {
2015                 m_tree.openElements()->pop();
2016                 return;
2017             }
2018             parseError(token);
2019             return;
2020         }
2021         if (token.name() == selectTag) {
2022             notImplemented(); // fragment case
2023             m_tree.openElements()->popUntilPopped(selectTag.localName());
2024             resetInsertionModeAppropriately();
2025             return;
2026         }
2027         break;
2028     case InTableTextMode:
2029     case InForeignContentMode:
2030         notImplemented();
2031         break;
2032     }
2033 }
2034
2035 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2036 {
2037     ASSERT(token.type() == HTMLToken::Comment);
2038     if (m_insertionMode == InitialMode
2039         || m_insertionMode == BeforeHTMLMode
2040         || m_insertionMode == AfterAfterBodyMode
2041         || m_insertionMode == AfterAfterFramesetMode) {
2042         m_tree.insertCommentOnDocument(token);
2043         return;
2044     }
2045     if (m_insertionMode == AfterBodyMode) {
2046         m_tree.insertCommentOnHTMLHtmlElement(token);
2047         return;
2048     }
2049     m_tree.insertComment(token);
2050 }
2051
2052 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2053 {
2054     ASSERT(token.type() == HTMLToken::Character);
2055     // FIXME: We need to figure out how to handle each character individually.
2056     switch (insertionMode()) {
2057     case InitialMode:
2058         ASSERT(insertionMode() == InitialMode);
2059         if (skipLeadingWhitespace(token))
2060             return;
2061         processDefaultForInitialMode(token);
2062         // Fall through.
2063     case BeforeHTMLMode:
2064         ASSERT(insertionMode() == BeforeHTMLMode);
2065         if (skipLeadingWhitespace(token))
2066             return;
2067         processDefaultForBeforeHTMLMode(token);
2068         // Fall through.
2069     case BeforeHeadMode:
2070         ASSERT(insertionMode() == BeforeHeadMode);
2071         if (skipLeadingWhitespace(token))
2072             return;
2073         processDefaultForBeforeHeadMode(token);
2074         // Fall through.
2075     case InHeadMode:
2076         ASSERT(insertionMode() == InHeadMode);
2077         if (m_tree.insertLeadingWhitespace(token))
2078             return;
2079         processDefaultForInHeadMode(token);
2080         // Fall through.
2081     case AfterHeadMode:
2082         ASSERT(insertionMode() == AfterHeadMode);
2083         if (m_tree.insertLeadingWhitespace(token))
2084             return;
2085         processDefaultForAfterHeadMode(token);
2086         // Fall through
2087     case InBodyMode:
2088     case InCaptionMode:
2089     case InCellMode:
2090         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2091         m_tree.reconstructTheActiveFormattingElements();
2092         m_tree.insertTextNode(token);
2093         if (m_framesetOk && hasNonWhitespace(token.characters()))
2094             m_framesetOk = false;
2095         break;
2096     case InTableMode:
2097     case InTableBodyMode:
2098     case InRowMode:
2099         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2100         notImplemented(); // Crazy pending characters.
2101         m_tree.insertTextNode(token);
2102         break;
2103     case InTableTextMode:
2104         notImplemented(); // Crazy pending characters.
2105         break;
2106     case InColumnGroupMode:
2107         ASSERT(insertionMode() == InColumnGroupMode);
2108         if (m_tree.insertLeadingWhitespace(token))
2109             return;
2110         if (!processColgroupEndTagForInColumnGroup()) {
2111             ASSERT(m_isParsingFragment);
2112             return;
2113         }
2114         processCharacter(token);
2115         break;
2116     case AfterBodyMode:
2117     case AfterAfterBodyMode:
2118         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2119         parseError(token);
2120         m_insertionMode = InBodyMode;
2121         processCharacter(token);
2122         break;
2123     case TextMode:
2124         notImplemented();
2125         m_tree.insertTextNode(token);
2126         break;
2127     case InHeadNoscriptMode:
2128         ASSERT(insertionMode() == InHeadNoscriptMode);
2129         if (m_tree.insertLeadingWhitespace(token))
2130             return;
2131         processDefaultForInHeadNoscriptMode(token);
2132         processToken(token);
2133         break;
2134     case InFramesetMode:
2135     case AfterFramesetMode:
2136         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2137         if (m_tree.insertLeadingWhitespace(token))
2138             return;
2139         parseError(token);
2140         // FIXME: We probably need some sort of loop here. We're basically
2141         // filtering out the non-whitespace characters.
2142         break;
2143     case InSelectInTableMode:
2144     case InSelectMode:
2145         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2146         m_tree.insertTextNode(token);
2147         break;
2148     case InForeignContentMode:
2149         notImplemented();
2150         break;
2151     case AfterAfterFramesetMode:
2152         if (m_tree.insertLeadingWhitespaceWithActiveFormattingElements(token))
2153             return;
2154         parseError(token);
2155         // FIXME: We probably need some sort of loop here. We're basically
2156         // filtering out the non-whitespace characters.
2157         break;
2158     }
2159 }
2160
2161 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2162 {
2163     ASSERT(token.type() == HTMLToken::EndOfFile);
2164     switch (insertionMode()) {
2165     case InitialMode:
2166         ASSERT(insertionMode() == InitialMode);
2167         processDefaultForInitialMode(token);
2168         // Fall through.
2169     case BeforeHTMLMode:
2170         ASSERT(insertionMode() == BeforeHTMLMode);
2171         processDefaultForBeforeHTMLMode(token);
2172         // Fall through.
2173     case BeforeHeadMode:
2174         ASSERT(insertionMode() == BeforeHeadMode);
2175         processDefaultForBeforeHeadMode(token);
2176         // Fall through.
2177     case InHeadMode:
2178         ASSERT(insertionMode() == InHeadMode);
2179         processDefaultForInHeadMode(token);
2180         // Fall through.
2181     case AfterHeadMode:
2182         ASSERT(insertionMode() == AfterHeadMode);
2183         processDefaultForAfterHeadMode(token);
2184         // Fall through
2185     case InBodyMode:
2186     case InCellMode:
2187         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
2188         notImplemented();
2189         break;
2190     case AfterBodyMode:
2191     case AfterAfterBodyMode:
2192         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2193         notImplemented();
2194         break;
2195     case InHeadNoscriptMode:
2196         ASSERT(insertionMode() == InHeadNoscriptMode);
2197         processDefaultForInHeadNoscriptMode(token);
2198         processToken(token);
2199         break;
2200     case AfterFramesetMode:
2201     case AfterAfterFramesetMode:
2202         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2203         break;
2204     case InFramesetMode:
2205     case InTableMode:
2206     case InTableBodyMode:
2207     case InSelectInTableMode:
2208     case InSelectMode:
2209         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2210         if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
2211             parseError(token);
2212         break;
2213     case InColumnGroupMode:
2214         if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
2215             ASSERT(m_isParsingFragment);
2216             return;
2217         }
2218         if (!processColgroupEndTagForInColumnGroup()) {
2219             ASSERT(m_isParsingFragment);
2220             return;
2221         }
2222         processEndOfFile(token);
2223         break;
2224     case TextMode:
2225     case InTableTextMode:
2226     case InCaptionMode:
2227     case InRowMode:
2228     case InForeignContentMode:
2229         notImplemented();
2230         break;
2231     }
2232 }
2233
2234 void HTMLTreeBuilder::processDefaultForInitialMode(AtomicHTMLToken& token)
2235 {
2236     notImplemented();
2237     parseError(token);
2238     setInsertionMode(BeforeHTMLMode);
2239 }
2240
2241 void HTMLTreeBuilder::processDefaultForBeforeHTMLMode(AtomicHTMLToken&)
2242 {
2243     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2244     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2245     setInsertionMode(BeforeHeadMode);
2246 }
2247
2248 void HTMLTreeBuilder::processDefaultForBeforeHeadMode(AtomicHTMLToken&)
2249 {
2250     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2251     processStartTag(startHead);
2252 }
2253
2254 void HTMLTreeBuilder::processDefaultForInHeadMode(AtomicHTMLToken&)
2255 {
2256     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2257     processEndTag(endHead);
2258 }
2259
2260 void HTMLTreeBuilder::processDefaultForInHeadNoscriptMode(AtomicHTMLToken&)
2261 {
2262     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2263     processEndTag(endNoscript);
2264 }
2265
2266 void HTMLTreeBuilder::processDefaultForAfterHeadMode(AtomicHTMLToken&)
2267 {
2268     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2269     processStartTag(startBody);
2270     m_framesetOk = true;
2271 }
2272
2273 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2274 {
2275     ASSERT(token.type() == HTMLToken::StartTag);
2276     if (token.name() == htmlTag) {
2277         m_tree.insertHTMLHtmlStartTagInBody(token);
2278         return true;
2279     }
2280     // FIXME: Atomize "command".
2281     if (token.name() == baseTag
2282         || token.name() == "command"
2283         || token.name() == linkTag
2284         || token.name() == metaTag) {
2285         m_tree.insertSelfClosingHTMLElement(token);
2286         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2287         return true;
2288     }
2289     if (token.name() == titleTag) {
2290         processGenericRCDATAStartTag(token);
2291         return true;
2292     }
2293     if (token.name() == noscriptTag) {
2294         if (isScriptingFlagEnabled(m_document->frame())) {
2295             processGenericRawTextStartTag(token);
2296             return true;
2297         }
2298         m_tree.insertHTMLElement(token);
2299         setInsertionMode(InHeadNoscriptMode);
2300         return true;
2301     }
2302     if (token.name() == noframesTag || token.name() == styleTag) {
2303         processGenericRawTextStartTag(token);
2304         return true;
2305     }
2306     if (token.name() == scriptTag) {
2307         processScriptStartTag(token);
2308         return true;
2309     }
2310     if (token.name() == headTag) {
2311         parseError(token);
2312         return true;
2313     }
2314     return false;
2315 }
2316
2317 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2318 {
2319     ASSERT(token.type() == HTMLToken::StartTag);
2320     m_tree.insertHTMLElement(token);
2321     m_tokenizer->setState(HTMLTokenizer::RCDATAState);
2322     m_originalInsertionMode = m_insertionMode;
2323     m_insertionMode = TextMode;
2324 }
2325
2326 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2327 {
2328     ASSERT(token.type() == HTMLToken::StartTag);
2329     m_tree.insertHTMLElement(token);
2330     m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
2331     m_originalInsertionMode = m_insertionMode;
2332     m_insertionMode = TextMode;
2333 }
2334
2335 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2336 {
2337     ASSERT(token.type() == HTMLToken::StartTag);
2338     m_tree.insertScriptElement(token);
2339     m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
2340     m_originalInsertionMode = m_insertionMode;
2341     m_insertionMode = TextMode;
2342 }
2343
2344 void HTMLTreeBuilder::finished()
2345 {
2346     // We should call m_document->finishedParsing() here, except
2347     // m_legacyTreeBuilder->finished() does it for us.
2348     if (m_legacyTreeBuilder) {
2349         m_legacyTreeBuilder->finished();
2350         return;
2351     }
2352
2353     // Warning, this may delete the parser, so don't try to do anything else after this.
2354     if (!m_isParsingFragment)
2355         m_document->finishedParsing();
2356 }
2357
2358 bool HTMLTreeBuilder::isScriptingFlagEnabled(Frame* frame)
2359 {
2360     if (!frame)
2361         return false;
2362     if (ScriptController* scriptController = frame->script())
2363         return scriptController->canExecuteScripts(NotAboutToExecuteScript);
2364     return false;
2365 }
2366
2367 }