5244b25cbc7c9e8b5097908928ec1a1701df91cd
[WebKit-https.git] / Source / WebCore / html / parser / HTMLTreeBuilder.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011-2017 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "DocumentFragment.h"
31 #include "HTMLDocument.h"
32 #include "HTMLDocumentParser.h"
33 #include "HTMLFormControlElement.h"
34 #include "HTMLFormElement.h"
35 #include "HTMLInputElement.h"
36 #include "HTMLOptGroupElement.h"
37 #include "HTMLOptionElement.h"
38 #include "HTMLParserIdioms.h"
39 #include "HTMLScriptElement.h"
40 #include "HTMLTableElement.h"
41 #include "JSCustomElementInterface.h"
42 #include "LocalizedStrings.h"
43 #include "NotImplemented.h"
44 #include "SVGScriptElement.h"
45 #include "XLinkNames.h"
46 #include "XMLNSNames.h"
47 #include "XMLNames.h"
48 #include <wtf/NeverDestroyed.h>
49 #include <wtf/unicode/CharacterNames.h>
50
51 #if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
52 #include "TelephoneNumberDetector.h"
53 #endif
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 CustomElementConstructionData::CustomElementConstructionData(Ref<JSCustomElementInterface>&& customElementInterface, const AtomicString& name, Vector<Attribute>&& attributes)
60     : elementInterface(WTFMove(customElementInterface))
61     , name(name)
62     , attributes(WTFMove(attributes))
63 {
64 }
65
66 CustomElementConstructionData::~CustomElementConstructionData() = default;
67
68 namespace {
69
70 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
71 {
72     return isHTMLSpace(character) || character == replacementCharacter;
73 }
74
75 }
76
77 static inline TextPosition uninitializedPositionValue1()
78 {
79     return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber());
80 }
81
82 static inline bool isAllWhitespace(const String& string)
83 {
84     return string.isAllSpecialCharacters<isHTMLSpace>();
85 }
86
87 static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
88 {
89     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
90 }
91
92 static bool isNumberedHeaderTag(const AtomicString& tagName)
93 {
94     return tagName == h1Tag
95         || tagName == h2Tag
96         || tagName == h3Tag
97         || tagName == h4Tag
98         || tagName == h5Tag
99         || tagName == h6Tag;
100 }
101
102 static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
103 {
104     return tagName == captionTag || tagName == colTag || tagName == colgroupTag;
105 }
106
107 static bool isTableCellContextTag(const AtomicString& tagName)
108 {
109     return tagName == thTag || tagName == tdTag;
110 }
111
112 static bool isTableBodyContextTag(const AtomicString& tagName)
113 {
114     return tagName == tbodyTag || tagName == tfootTag || tagName == theadTag;
115 }
116
117 static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
118 {
119     return tagName == bTag
120         || tagName == bigTag
121         || tagName == codeTag
122         || tagName == emTag
123         || tagName == fontTag
124         || tagName == iTag
125         || tagName == sTag
126         || tagName == smallTag
127         || tagName == strikeTag
128         || tagName == strongTag
129         || tagName == ttTag
130         || tagName == uTag;
131 }
132
133 static bool isNonAnchorFormattingTag(const AtomicString& tagName)
134 {
135     return tagName == nobrTag || isNonAnchorNonNobrFormattingTag(tagName);
136 }
137
138 // https://html.spec.whatwg.org/multipage/syntax.html#formatting
139 bool HTMLConstructionSite::isFormattingTag(const AtomicString& tagName)
140 {
141     return tagName == aTag || isNonAnchorFormattingTag(tagName);
142 }
143
144 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
145 public:
146     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
147         : m_text(token.characters(), token.charactersLength())
148         , m_isAll8BitData(token.charactersIsAll8BitData())
149     {
150         ASSERT(!isEmpty());
151     }
152
153     explicit ExternalCharacterTokenBuffer(const String& string)
154         : m_text(string)
155         , m_isAll8BitData(m_text.is8Bit())
156     {
157         ASSERT(!isEmpty());
158     }
159
160     ~ExternalCharacterTokenBuffer()
161     {
162         ASSERT(isEmpty());
163     }
164
165     bool isEmpty() const { return m_text.isEmpty(); }
166
167     bool isAll8BitData() const { return m_isAll8BitData; }
168
169     void skipAtMostOneLeadingNewline()
170     {
171         ASSERT(!isEmpty());
172         if (m_text[0] == '\n')
173             m_text = m_text.substring(1);
174     }
175
176     void skipLeadingWhitespace()
177     {
178         skipLeading<isHTMLSpace>();
179     }
180
181     String takeLeadingWhitespace()
182     {
183         return takeLeading<isHTMLSpace>();
184     }
185
186     void skipLeadingNonWhitespace()
187     {
188         skipLeading<isNotHTMLSpace>();
189     }
190
191     String takeRemaining()
192     {
193         String result = makeString(m_text);
194         m_text = StringView();
195         return result;
196     }
197
198     void giveRemainingTo(StringBuilder& recipient)
199     {
200         recipient.append(m_text);
201         m_text = StringView();
202     }
203
204     String takeRemainingWhitespace()
205     {
206         ASSERT(!isEmpty());
207         Vector<LChar, 8> whitespace;
208         do {
209             UChar character = m_text[0];
210             if (isHTMLSpace(character))
211                 whitespace.append(character);
212             m_text = m_text.substring(1);
213         } while (!m_text.isEmpty());
214
215         // Returning the null string when there aren't any whitespace
216         // characters is slightly cleaner semantically because we don't want
217         // to insert a text node (as opposed to inserting an empty text node).
218         if (whitespace.isEmpty())
219             return String();
220
221         return String::adopt(WTFMove(whitespace));
222     }
223
224 private:
225     template<bool characterPredicate(UChar)> void skipLeading()
226     {
227         ASSERT(!isEmpty());
228         while (characterPredicate(m_text[0])) {
229             m_text = m_text.substring(1);
230             if (m_text.isEmpty())
231                 return;
232         }
233     }
234
235     template<bool characterPredicate(UChar)> String takeLeading()
236     {
237         ASSERT(!isEmpty());
238         StringView start = m_text;
239         skipLeading<characterPredicate>();
240         if (start.length() == m_text.length())
241             return String();
242         return makeString(start.substring(0, start.length() - m_text.length()));
243     }
244
245     String makeString(StringView stringView) const
246     {
247         if (stringView.is8Bit() || !isAll8BitData())
248             return stringView.toString();
249         return String::make8BitFrom16BitSource(stringView.characters16(), stringView.length());
250     }
251
252     StringView m_text;
253     bool m_isAll8BitData;
254 };
255
256 inline bool HTMLTreeBuilder::isParsingTemplateContents() const
257 {
258     return m_tree.openElements().hasTemplateInHTMLScope();
259 }
260
261 inline bool HTMLTreeBuilder::isParsingFragmentOrTemplateContents() const
262 {
263     return isParsingFragment() || isParsingTemplateContents();
264 }
265
266 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, HTMLDocument& document, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
267     : m_parser(parser)
268     , m_options(options)
269     , m_tree(document, parserContentPolicy, options.maximumDOMTreeDepth)
270     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
271 {
272 #if !ASSERT_DISABLED
273     m_destructionProhibited = false;
274 #endif
275 }
276
277 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, DocumentFragment& fragment, Element& contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
278     : m_parser(parser)
279     , m_options(options)
280     , m_fragmentContext(fragment, contextElement)
281     , m_tree(fragment, parserContentPolicy, options.maximumDOMTreeDepth)
282     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
283 {
284     ASSERT(isMainThread());
285
286     // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
287     // For efficiency, we skip step 5 ("Let root be a new html element with no attributes") and instead use the DocumentFragment as a root node.
288     m_tree.openElements().pushRootNode(HTMLStackItem::create(fragment));
289
290     if (contextElement.hasTagName(templateTag))
291         m_templateInsertionModes.append(InsertionMode::TemplateContents);
292
293     resetInsertionModeAppropriately();
294
295     m_tree.setForm(is<HTMLFormElement>(contextElement) ? &downcast<HTMLFormElement>(contextElement) : HTMLFormElement::findClosestFormAncestor(contextElement));
296
297 #if !ASSERT_DISABLED
298     m_destructionProhibited = false;
299 #endif
300 }
301
302 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
303 {
304 }
305
306 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment& fragment, Element& contextElement)
307     : m_fragment(&fragment)
308 {
309     ASSERT(!fragment.hasChildNodes());
310     m_contextElementStackItem = HTMLStackItem::create(contextElement);
311 }
312
313 inline Element& HTMLTreeBuilder::FragmentParsingContext::contextElement() const
314 {
315     return contextElementStackItem().element();
316 }
317
318 inline HTMLStackItem& HTMLTreeBuilder::FragmentParsingContext::contextElementStackItem() const
319 {
320     ASSERT(m_fragment);
321     return *m_contextElementStackItem;
322 }
323
324 RefPtr<ScriptElement> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
325 {
326     ASSERT(!m_destroyed);
327
328     if (!m_scriptToProcess)
329         return nullptr;
330
331     // Unpause ourselves, callers may pause us again when processing the script.
332     // The HTML5 spec is written as though scripts are executed inside the tree builder.
333     // We pause the parser to exit the tree builder, and then resume before running scripts.
334     scriptStartPosition = m_scriptToProcessStartPosition;
335     m_scriptToProcessStartPosition = uninitializedPositionValue1();
336     return WTFMove(m_scriptToProcess);
337 }
338
339 void HTMLTreeBuilder::constructTree(AtomicHTMLToken&& token)
340 {
341 #if !ASSERT_DISABLED
342     ASSERT(!m_destroyed);
343     ASSERT(!m_destructionProhibited);
344     m_destructionProhibited = true;
345 #endif
346
347     if (shouldProcessTokenInForeignContent(token))
348         processTokenInForeignContent(WTFMove(token));
349     else
350         processToken(WTFMove(token));
351
352     bool inForeignContent = !m_tree.isEmpty()
353         && !isInHTMLNamespace(adjustedCurrentStackItem())
354         && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
355         && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
356
357     m_parser.tokenizer().setForceNullCharacterReplacement(m_insertionMode == InsertionMode::Text || inForeignContent);
358     m_parser.tokenizer().setShouldAllowCDATA(inForeignContent);
359
360 #if !ASSERT_DISABLED
361     m_destructionProhibited = false;
362 #endif
363
364     m_tree.executeQueuedTasks();
365     // The tree builder might have been destroyed as an indirect result of executing the queued tasks.
366 }
367
368 void HTMLTreeBuilder::processToken(AtomicHTMLToken&& token)
369 {
370     switch (token.type()) {
371     case HTMLToken::Uninitialized:
372         ASSERT_NOT_REACHED();
373         break;
374     case HTMLToken::DOCTYPE:
375         m_shouldSkipLeadingNewline = false;
376         processDoctypeToken(WTFMove(token));
377         break;
378     case HTMLToken::StartTag:
379         m_shouldSkipLeadingNewline = false;
380         processStartTag(WTFMove(token));
381         break;
382     case HTMLToken::EndTag:
383         m_shouldSkipLeadingNewline = false;
384         processEndTag(WTFMove(token));
385         break;
386     case HTMLToken::Comment:
387         m_shouldSkipLeadingNewline = false;
388         processComment(WTFMove(token));
389         return;
390     case HTMLToken::Character:
391         processCharacter(WTFMove(token));
392         break;
393     case HTMLToken::EndOfFile:
394         m_shouldSkipLeadingNewline = false;
395         processEndOfFile(WTFMove(token));
396         break;
397     }
398 }
399
400 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken&& token)
401 {
402     ASSERT(token.type() == HTMLToken::DOCTYPE);
403     if (m_insertionMode == InsertionMode::Initial) {
404         m_tree.insertDoctype(WTFMove(token));
405         m_insertionMode = InsertionMode::BeforeHTML;
406         return;
407     }
408     if (m_insertionMode == InsertionMode::InTableText) {
409         defaultForInTableText();
410         processDoctypeToken(WTFMove(token));
411         return;
412     }
413     parseError(token);
414 }
415
416 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, Vector<Attribute>&& attributes)
417 {
418     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
419     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), WTFMove(attributes));
420     processStartTag(WTFMove(fakeToken));
421 }
422
423 void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
424 {
425     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
426     processEndTag(WTFMove(fakeToken));
427 }
428
429 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
430 {
431     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
432     processFakeEndTag(tagName.localName());
433 }
434
435 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
436 {
437     ASSERT(!characters.isEmpty());
438     ExternalCharacterTokenBuffer buffer(characters);
439     processCharacterBuffer(buffer);
440 }
441
442 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
443 {
444     if (!m_tree.openElements().inButtonScope(pTag->localName()))
445         return;
446     AtomicHTMLToken endP(HTMLToken::EndTag, pTag->localName());
447     processEndTag(WTFMove(endP));
448 }
449
450 namespace {
451
452 bool isLi(const HTMLStackItem& item)
453 {
454     return item.hasTagName(liTag);
455 }
456
457 bool isDdOrDt(const HTMLStackItem& item)
458 {
459     return item.hasTagName(ddTag) || item.hasTagName(dtTag);
460 }
461
462 }
463
464 template <bool shouldClose(const HTMLStackItem&)> void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken&& token)
465 {
466     m_framesetOk = false;
467     for (auto* nodeRecord = &m_tree.openElements().topRecord(); ; nodeRecord = nodeRecord->next()) {
468         HTMLStackItem& item = nodeRecord->stackItem();
469         if (shouldClose(item)) {
470             ASSERT(item.isElement());
471             processFakeEndTag(item.localName());
472             break;
473         }
474         if (isSpecialNode(item) && !item.hasTagName(addressTag) && !item.hasTagName(divTag) && !item.hasTagName(pTag))
475             break;
476     }
477     processFakePEndTagIfPInButtonScope();
478     m_tree.insertHTMLElement(WTFMove(token));
479 }
480
481 template <typename TableQualifiedName> static HashMap<AtomicString, QualifiedName> createCaseMap(const TableQualifiedName* const names[], unsigned length)
482 {
483     HashMap<AtomicString, QualifiedName> map;
484     for (unsigned i = 0; i < length; ++i) {
485         const QualifiedName& name = *names[i];
486         const AtomicString& localName = name.localName();
487         AtomicString loweredLocalName = localName.convertToASCIILowercase();
488         if (loweredLocalName != localName)
489             map.add(loweredLocalName, name);
490     }
491     return map;
492 }
493
494 static void adjustSVGTagNameCase(AtomicHTMLToken& token)
495 {
496     static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(SVGNames::getSVGTags(), SVGNames::SVGTagsCount);
497     const QualifiedName& casedName = map.get().get(token.name());
498     if (casedName.localName().isNull())
499         return;
500     token.setName(casedName.localName());
501 }
502
503 static inline void adjustAttributes(HashMap<AtomicString, QualifiedName>& map, AtomicHTMLToken& token)
504 {
505     for (auto& attribute : token.attributes()) {
506         const QualifiedName& casedName = map.get(attribute.localName());
507         if (!casedName.localName().isNull())
508             attribute.parserSetName(casedName);
509     }
510 }
511
512 template<const QualifiedName* const* attributesTable(), unsigned attributesTableLength> static void adjustAttributes(AtomicHTMLToken& token)
513 {
514     static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(attributesTable(), attributesTableLength);
515     adjustAttributes(map, token);
516 }
517
518 static inline void adjustSVGAttributes(AtomicHTMLToken& token)
519 {
520     adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
521 }
522
523 static inline void adjustMathMLAttributes(AtomicHTMLToken& token)
524 {
525     adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
526 }
527
528 static void addNamesWithPrefix(HashMap<AtomicString, QualifiedName>& map, const AtomicString& prefix, const QualifiedName* const names[], unsigned length)
529 {
530     for (unsigned i = 0; i < length; ++i) {
531         const QualifiedName& name = *names[i];
532         const AtomicString& localName = name.localName();
533         map.add(prefix + ':' + localName, QualifiedName(prefix, localName, name.namespaceURI()));
534     }
535 }
536
537 static HashMap<AtomicString, QualifiedName> createForeignAttributesMap()
538 {
539     HashMap<AtomicString, QualifiedName> map;
540
541     AtomicString xlinkName("xlink", AtomicString::ConstructFromLiteral);
542     addNamesWithPrefix(map, xlinkName, XLinkNames::getXLinkAttrs(), XLinkNames::XLinkAttrsCount);
543     addNamesWithPrefix(map, xmlAtom(), XMLNames::getXMLAttrs(), XMLNames::XMLAttrsCount);
544
545     map.add(WTF::xmlnsAtom(), XMLNSNames::xmlnsAttr);
546     map.add("xmlns:xlink", QualifiedName(xmlnsAtom(), xlinkName, XMLNSNames::xmlnsNamespaceURI));
547
548     return map;
549 }
550
551 static void adjustForeignAttributes(AtomicHTMLToken& token)
552 {
553     static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createForeignAttributesMap();
554     adjustAttributes(map, token);
555 }
556
557 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken&& token)
558 {
559     ASSERT(token.type() == HTMLToken::StartTag);
560     if (token.name() == htmlTag) {
561         processHtmlStartTagForInBody(WTFMove(token));
562         return;
563     }
564     if (token.name() == baseTag
565         || token.name() == basefontTag
566         || token.name() == bgsoundTag
567         || token.name() == commandTag
568         || token.name() == linkTag
569         || token.name() == metaTag
570         || token.name() == noframesTag
571         || token.name() == scriptTag
572         || token.name() == styleTag
573         || token.name() == titleTag) {
574         bool didProcess = processStartTagForInHead(WTFMove(token));
575         ASSERT_UNUSED(didProcess, didProcess);
576         return;
577     }
578     if (token.name() == bodyTag) {
579         parseError(token);
580         bool fragmentOrTemplateCase = !m_tree.openElements().secondElementIsHTMLBodyElement() || m_tree.openElements().hasOnlyOneElement()
581             || m_tree.openElements().hasTemplateInHTMLScope();
582         if (fragmentOrTemplateCase) {
583             ASSERT(isParsingFragmentOrTemplateContents());
584             return;
585         }
586         m_framesetOk = false;
587         m_tree.insertHTMLBodyStartTagInBody(WTFMove(token));
588         return;
589     }
590     if (token.name() == framesetTag) {
591         parseError(token);
592         if (!m_tree.openElements().secondElementIsHTMLBodyElement() || m_tree.openElements().hasOnlyOneElement()) {
593             ASSERT(isParsingFragmentOrTemplateContents());
594             return;
595         }
596         if (!m_framesetOk)
597             return;
598         m_tree.openElements().bodyElement().remove();
599         m_tree.openElements().popUntil(m_tree.openElements().bodyElement());
600         m_tree.openElements().popHTMLBodyElement();
601         // Note: in the fragment case the root is a DocumentFragment instead of a proper html element which is a quirk / optimization in WebKit.
602         ASSERT(!isParsingFragment() || is<DocumentFragment>(m_tree.openElements().topNode()));
603         ASSERT(isParsingFragment() || &m_tree.openElements().top() == &m_tree.openElements().htmlElement());
604         m_tree.insertHTMLElement(WTFMove(token));
605         m_insertionMode = InsertionMode::InFrameset;
606         return;
607     }
608     if (token.name() == addressTag
609         || token.name() == articleTag
610         || token.name() == asideTag
611         || token.name() == blockquoteTag
612         || token.name() == centerTag
613         || token.name() == detailsTag
614         || token.name() == dirTag
615         || token.name() == divTag
616         || token.name() == dlTag
617         || token.name() == fieldsetTag
618         || token.name() == figcaptionTag
619         || token.name() == figureTag
620         || token.name() == footerTag
621         || token.name() == headerTag
622         || token.name() == hgroupTag
623         || token.name() == mainTag
624         || token.name() == menuTag
625         || token.name() == navTag
626         || token.name() == olTag
627         || token.name() == pTag
628         || token.name() == sectionTag
629         || token.name() == summaryTag
630         || token.name() == ulTag) {
631         processFakePEndTagIfPInButtonScope();
632         m_tree.insertHTMLElement(WTFMove(token));
633         return;
634     }
635     if (isNumberedHeaderTag(token.name())) {
636         processFakePEndTagIfPInButtonScope();
637         if (isNumberedHeaderElement(m_tree.currentStackItem())) {
638             parseError(token);
639             m_tree.openElements().pop();
640         }
641         m_tree.insertHTMLElement(WTFMove(token));
642         return;
643     }
644     if (token.name() == preTag || token.name() == listingTag) {
645         processFakePEndTagIfPInButtonScope();
646         m_tree.insertHTMLElement(WTFMove(token));
647         m_shouldSkipLeadingNewline = true;
648         m_framesetOk = false;
649         return;
650     }
651     if (token.name() == formTag) {
652         if (m_tree.form() && !isParsingTemplateContents()) {
653             parseError(token);
654             return;
655         }
656         processFakePEndTagIfPInButtonScope();
657         m_tree.insertHTMLFormElement(WTFMove(token));
658         return;
659     }
660     if (token.name() == liTag) {
661         processCloseWhenNestedTag<isLi>(WTFMove(token));
662         return;
663     }
664     if (token.name() == ddTag || token.name() == dtTag) {
665         processCloseWhenNestedTag<isDdOrDt>(WTFMove(token));
666         return;
667     }
668     if (token.name() == plaintextTag) {
669         processFakePEndTagIfPInButtonScope();
670         m_tree.insertHTMLElement(WTFMove(token));
671         m_parser.tokenizer().setPLAINTEXTState();
672         return;
673     }
674     if (token.name() == buttonTag) {
675         if (m_tree.openElements().inScope(buttonTag)) {
676             parseError(token);
677             processFakeEndTag(buttonTag);
678             processStartTag(WTFMove(token)); // FIXME: Could we just fall through here?
679             return;
680         }
681         m_tree.reconstructTheActiveFormattingElements();
682         m_tree.insertHTMLElement(WTFMove(token));
683         m_framesetOk = false;
684         return;
685     }
686     if (token.name() == aTag) {
687         RefPtr<Element> activeATag = m_tree.activeFormattingElements().closestElementInScopeWithName(aTag->localName());
688         if (activeATag) {
689             parseError(token);
690             processFakeEndTag(aTag);
691             m_tree.activeFormattingElements().remove(*activeATag);
692             if (m_tree.openElements().contains(*activeATag))
693                 m_tree.openElements().remove(*activeATag);
694         }
695         m_tree.reconstructTheActiveFormattingElements();
696         m_tree.insertFormattingElement(WTFMove(token));
697         return;
698     }
699     if (isNonAnchorNonNobrFormattingTag(token.name())) {
700         m_tree.reconstructTheActiveFormattingElements();
701         m_tree.insertFormattingElement(WTFMove(token));
702         return;
703     }
704     if (token.name() == nobrTag) {
705         m_tree.reconstructTheActiveFormattingElements();
706         if (m_tree.openElements().inScope(nobrTag)) {
707             parseError(token);
708             processFakeEndTag(nobrTag);
709             m_tree.reconstructTheActiveFormattingElements();
710         }
711         m_tree.insertFormattingElement(WTFMove(token));
712         return;
713     }
714     if (token.name() == appletTag || token.name() == embedTag || token.name() == objectTag) {
715         if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
716             return;
717     }
718     if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) {
719         m_tree.reconstructTheActiveFormattingElements();
720         m_tree.insertHTMLElement(WTFMove(token));
721         m_tree.activeFormattingElements().appendMarker();
722         m_framesetOk = false;
723         return;
724     }
725     if (token.name() == tableTag) {
726         if (!m_tree.inQuirksMode() && m_tree.openElements().inButtonScope(pTag))
727             processFakeEndTag(pTag);
728         m_tree.insertHTMLElement(WTFMove(token));
729         m_framesetOk = false;
730         m_insertionMode = InsertionMode::InTable;
731         return;
732     }
733     if (token.name() == imageTag) {
734         parseError(token);
735         // Apparently we're not supposed to ask.
736         token.setName(imgTag->localName());
737         // Note the fall through to the imgTag handling below!
738     }
739     if (token.name() == areaTag
740         || token.name() == brTag
741         || token.name() == embedTag
742         || token.name() == imgTag
743         || token.name() == keygenTag
744         || token.name() == wbrTag) {
745         m_tree.reconstructTheActiveFormattingElements();
746         m_tree.insertSelfClosingHTMLElement(WTFMove(token));
747         m_framesetOk = false;
748         return;
749     }
750     if (token.name() == inputTag) {
751         m_tree.reconstructTheActiveFormattingElements();
752         auto* typeAttribute = findAttribute(token.attributes(), typeAttr);
753         bool shouldClearFramesetOK = !typeAttribute || !equalLettersIgnoringASCIICase(typeAttribute->value(), "hidden");
754         m_tree.insertSelfClosingHTMLElement(WTFMove(token));
755         if (shouldClearFramesetOK)
756             m_framesetOk = false;
757         return;
758     }
759     if (token.name() == paramTag || token.name() == sourceTag || token.name() == trackTag) {
760         m_tree.insertSelfClosingHTMLElement(WTFMove(token));
761         return;
762     }
763     if (token.name() == hrTag) {
764         processFakePEndTagIfPInButtonScope();
765         m_tree.insertSelfClosingHTMLElement(WTFMove(token));
766         m_framesetOk = false;
767         return;
768     }
769     if (token.name() == textareaTag) {
770         m_tree.insertHTMLElement(WTFMove(token));
771         m_shouldSkipLeadingNewline = true;
772         m_parser.tokenizer().setRCDATAState();
773         m_originalInsertionMode = m_insertionMode;
774         m_framesetOk = false;
775         m_insertionMode = InsertionMode::Text;
776         return;
777     }
778     if (token.name() == xmpTag) {
779         processFakePEndTagIfPInButtonScope();
780         m_tree.reconstructTheActiveFormattingElements();
781         m_framesetOk = false;
782         processGenericRawTextStartTag(WTFMove(token));
783         return;
784     }
785     if (token.name() == iframeTag) {
786         m_framesetOk = false;
787         processGenericRawTextStartTag(WTFMove(token));
788         return;
789     }
790     if (token.name() == noembedTag && m_options.pluginsEnabled) {
791         processGenericRawTextStartTag(WTFMove(token));
792         return;
793     }
794     if (token.name() == noscriptTag && m_options.scriptEnabled) {
795         processGenericRawTextStartTag(WTFMove(token));
796         return;
797     }
798     if (token.name() == selectTag) {
799         m_tree.reconstructTheActiveFormattingElements();
800         m_tree.insertHTMLElement(WTFMove(token));
801         m_framesetOk = false;
802         if (m_insertionMode == InsertionMode::InTable
803             || m_insertionMode == InsertionMode::InCaption
804             || m_insertionMode == InsertionMode::InColumnGroup
805             || m_insertionMode == InsertionMode::InTableBody
806             || m_insertionMode == InsertionMode::InRow
807             || m_insertionMode == InsertionMode::InCell)
808             m_insertionMode = InsertionMode::InSelectInTable;
809         else
810             m_insertionMode = InsertionMode::InSelect;
811         return;
812     }
813     if (token.name() == optgroupTag || token.name() == optionTag) {
814         if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
815             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
816             processEndTag(WTFMove(endOption));
817         }
818         m_tree.reconstructTheActiveFormattingElements();
819         m_tree.insertHTMLElement(WTFMove(token));
820         return;
821     }
822     if (token.name() == rbTag || token.name() == rtcTag) {
823         if (m_tree.openElements().inScope(rubyTag->localName())) {
824             m_tree.generateImpliedEndTags();
825             if (!m_tree.currentStackItem().hasTagName(rubyTag))
826                 parseError(token);
827         }
828         m_tree.insertHTMLElement(WTFMove(token));
829         return;
830     }
831     if (token.name() == rtTag || token.name() == rpTag) {
832         if (m_tree.openElements().inScope(rubyTag->localName())) {
833             m_tree.generateImpliedEndTagsWithExclusion(rtcTag->localName());
834             if (!m_tree.currentStackItem().hasTagName(rubyTag) && !m_tree.currentStackItem().hasTagName(rtcTag))
835                 parseError(token);
836         }
837         m_tree.insertHTMLElement(WTFMove(token));
838         return;
839     }
840     if (token.name() == MathMLNames::mathTag->localName()) {
841         m_tree.reconstructTheActiveFormattingElements();
842         adjustMathMLAttributes(token);
843         adjustForeignAttributes(token);
844         m_tree.insertForeignElement(WTFMove(token), MathMLNames::mathmlNamespaceURI);
845         return;
846     }
847     if (token.name() == SVGNames::svgTag->localName()) {
848         m_tree.reconstructTheActiveFormattingElements();
849         adjustSVGAttributes(token);
850         adjustForeignAttributes(token);
851         m_tree.insertForeignElement(WTFMove(token), SVGNames::svgNamespaceURI);
852         return;
853     }
854     if (isCaptionColOrColgroupTag(token.name())
855         || token.name() == frameTag
856         || token.name() == headTag
857         || isTableBodyContextTag(token.name())
858         || isTableCellContextTag(token.name())
859         || token.name() == trTag) {
860         parseError(token);
861         return;
862     }
863     if (token.name() == templateTag) {
864         m_framesetOk = false;
865         processTemplateStartTag(WTFMove(token));
866         return;
867     }
868     m_tree.reconstructTheActiveFormattingElements();
869     insertGenericHTMLElement(WTFMove(token));
870 }
871
872 inline void HTMLTreeBuilder::insertGenericHTMLElement(AtomicHTMLToken&& token)
873 {
874     m_customElementToConstruct = m_tree.insertHTMLElementOrFindCustomElementInterface(WTFMove(token));
875 }
876
877 void HTMLTreeBuilder::didCreateCustomOrFallbackElement(Ref<Element>&& element, CustomElementConstructionData& data)
878 {
879     m_tree.insertCustomElement(WTFMove(element), data.name, WTFMove(data.attributes));
880 }
881
882 void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken&& token)
883 {
884     m_tree.activeFormattingElements().appendMarker();
885     m_tree.insertHTMLElement(WTFMove(token));
886     m_templateInsertionModes.append(InsertionMode::TemplateContents);
887     m_insertionMode = InsertionMode::TemplateContents;
888 }
889
890 bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken&& token)
891 {
892     ASSERT(token.name() == templateTag->localName());
893     if (!m_tree.openElements().hasTemplateInHTMLScope()) {
894         ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && m_fragmentContext.contextElement().hasTagName(templateTag)));
895         parseError(token);
896         return false;
897     }
898     m_tree.generateImpliedEndTags();
899     if (!m_tree.currentStackItem().hasTagName(templateTag))
900         parseError(token);
901     m_tree.openElements().popUntilPopped(templateTag);
902     m_tree.activeFormattingElements().clearToLastMarker();
903     m_templateInsertionModes.removeLast();
904     resetInsertionModeAppropriately();
905     return true;
906 }
907
908 bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken&& token)
909 {
910     AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag->localName());
911     if (!processTemplateEndTag(WTFMove(endTemplate)))
912         return false;
913
914     processEndOfFile(WTFMove(token));
915     return true;
916 }
917
918 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
919 {
920     bool ignoreFakeEndTag = m_tree.currentIsRootNode() || m_tree.currentNode().hasTagName(templateTag);
921
922     if (ignoreFakeEndTag) {
923         ASSERT(isParsingFragmentOrTemplateContents());
924         // FIXME: parse error
925         return false;
926     }
927     m_tree.openElements().pop();
928     m_insertionMode = InsertionMode::InTable;
929     return true;
930 }
931
932 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
933 void HTMLTreeBuilder::closeTheCell()
934 {
935     ASSERT(m_insertionMode == InsertionMode::InCell);
936     if (m_tree.openElements().inTableScope(tdTag)) {
937         ASSERT(!m_tree.openElements().inTableScope(thTag));
938         processFakeEndTag(tdTag);
939         return;
940     }
941     ASSERT(m_tree.openElements().inTableScope(thTag));
942     processFakeEndTag(thTag);
943     ASSERT(m_insertionMode == InsertionMode::InRow);
944 }
945
946 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken&& token)
947 {
948     ASSERT(token.type() == HTMLToken::StartTag);
949     if (token.name() == captionTag) {
950         m_tree.openElements().popUntilTableScopeMarker();
951         m_tree.activeFormattingElements().appendMarker();
952         m_tree.insertHTMLElement(WTFMove(token));
953         m_insertionMode = InsertionMode::InCaption;
954         return;
955     }
956     if (token.name() == colgroupTag) {
957         m_tree.openElements().popUntilTableScopeMarker();
958         m_tree.insertHTMLElement(WTFMove(token));
959         m_insertionMode = InsertionMode::InColumnGroup;
960         return;
961     }
962     if (token.name() == colTag) {
963         processFakeStartTag(colgroupTag);
964         ASSERT(m_insertionMode == InsertionMode::InColumnGroup);
965         processStartTag(WTFMove(token));
966         return;
967     }
968     if (isTableBodyContextTag(token.name())) {
969         m_tree.openElements().popUntilTableScopeMarker();
970         m_tree.insertHTMLElement(WTFMove(token));
971         m_insertionMode = InsertionMode::InTableBody;
972         return;
973     }
974     if (isTableCellContextTag(token.name()) || token.name() == trTag) {
975         processFakeStartTag(tbodyTag);
976         ASSERT(m_insertionMode == InsertionMode::InTableBody);
977         processStartTag(WTFMove(token));
978         return;
979     }
980     if (token.name() == tableTag) {
981         parseError(token);
982         if (!processTableEndTagForInTable()) {
983             ASSERT(isParsingFragmentOrTemplateContents());
984             return;
985         }
986         processStartTag(WTFMove(token));
987         return;
988     }
989     if (token.name() == styleTag || token.name() == scriptTag) {
990         processStartTagForInHead(WTFMove(token));
991         return;
992     }
993     if (token.name() == inputTag) {
994         auto* typeAttribute = findAttribute(token.attributes(), typeAttr);
995         if (typeAttribute && equalLettersIgnoringASCIICase(typeAttribute->value(), "hidden")) {
996             parseError(token);
997             m_tree.insertSelfClosingHTMLElement(WTFMove(token));
998             return;
999         }
1000         // Fall through to "anything else" case.
1001     }
1002     if (token.name() == formTag) {
1003         parseError(token);
1004         if (m_tree.form() && !isParsingTemplateContents())
1005             return;
1006         m_tree.insertHTMLFormElement(WTFMove(token), true);
1007         m_tree.openElements().pop();
1008         return;
1009     }
1010     if (token.name() == templateTag) {
1011         processTemplateStartTag(WTFMove(token));
1012         return;
1013     }
1014     parseError(token);
1015     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1016     processStartTagForInBody(WTFMove(token));
1017 }
1018
1019 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken&& token)
1020 {
1021     ASSERT(token.type() == HTMLToken::StartTag);
1022     switch (m_insertionMode) {
1023     case InsertionMode::Initial:
1024         defaultForInitial();
1025         ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
1026         FALLTHROUGH;
1027     case InsertionMode::BeforeHTML:
1028         if (token.name() == htmlTag) {
1029             m_tree.insertHTMLHtmlStartTagBeforeHTML(WTFMove(token));
1030             m_insertionMode = InsertionMode::BeforeHead;
1031             return;
1032         }
1033         defaultForBeforeHTML();
1034         ASSERT(m_insertionMode == InsertionMode::BeforeHead);
1035         FALLTHROUGH;
1036     case InsertionMode::BeforeHead:
1037         if (token.name() == htmlTag) {
1038             processHtmlStartTagForInBody(WTFMove(token));
1039             return;
1040         }
1041         if (token.name() == headTag) {
1042             m_tree.insertHTMLHeadElement(WTFMove(token));
1043             m_insertionMode = InsertionMode::InHead;
1044             return;
1045         }
1046         defaultForBeforeHead();
1047         ASSERT(m_insertionMode == InsertionMode::InHead);
1048         FALLTHROUGH;
1049     case InsertionMode::InHead:
1050         if (processStartTagForInHead(WTFMove(token)))
1051             return;
1052         defaultForInHead();
1053         ASSERT(m_insertionMode == InsertionMode::AfterHead);
1054         FALLTHROUGH;
1055     case InsertionMode::AfterHead:
1056         if (token.name() == htmlTag) {
1057             processHtmlStartTagForInBody(WTFMove(token));
1058             return;
1059         }
1060         if (token.name() == bodyTag) {
1061             m_framesetOk = false;
1062             m_tree.insertHTMLBodyElement(WTFMove(token));
1063             m_insertionMode = InsertionMode::InBody;
1064             return;
1065         }
1066         if (token.name() == framesetTag) {
1067             m_tree.insertHTMLElement(WTFMove(token));
1068             m_insertionMode = InsertionMode::InFrameset;
1069             return;
1070         }
1071         if (token.name() == baseTag
1072             || token.name() == basefontTag
1073             || token.name() == bgsoundTag
1074             || token.name() == linkTag
1075             || token.name() == metaTag
1076             || token.name() == noframesTag
1077             || token.name() == scriptTag
1078             || token.name() == styleTag
1079             || token.name() == templateTag
1080             || token.name() == titleTag) {
1081             parseError(token);
1082             ASSERT(m_tree.headStackItem());
1083             m_tree.openElements().pushHTMLHeadElement(*m_tree.headStackItem());
1084             processStartTagForInHead(WTFMove(token));
1085             m_tree.openElements().removeHTMLHeadElement(m_tree.head());
1086             return;
1087         }
1088         if (token.name() == headTag) {
1089             parseError(token);
1090             return;
1091         }
1092         defaultForAfterHead();
1093         ASSERT(m_insertionMode == InsertionMode::InBody);
1094         FALLTHROUGH;
1095     case InsertionMode::InBody:
1096         processStartTagForInBody(WTFMove(token));
1097         break;
1098     case InsertionMode::InTable:
1099         processStartTagForInTable(WTFMove(token));
1100         break;
1101     case InsertionMode::InCaption:
1102         if (isCaptionColOrColgroupTag(token.name())
1103             || isTableBodyContextTag(token.name())
1104             || isTableCellContextTag(token.name())
1105             || token.name() == trTag) {
1106             parseError(token);
1107             if (!processCaptionEndTagForInCaption()) {
1108                 ASSERT(isParsingFragment());
1109                 return;
1110             }
1111             processStartTag(WTFMove(token));
1112             return;
1113         }
1114         processStartTagForInBody(WTFMove(token));
1115         break;
1116     case InsertionMode::InColumnGroup:
1117         if (token.name() == htmlTag) {
1118             processHtmlStartTagForInBody(WTFMove(token));
1119             return;
1120         }
1121         if (token.name() == colTag) {
1122             m_tree.insertSelfClosingHTMLElement(WTFMove(token));
1123             return;
1124         }
1125         if (token.name() == templateTag) {
1126             processTemplateStartTag(WTFMove(token));
1127             return;
1128         }
1129         if (!processColgroupEndTagForInColumnGroup()) {
1130             ASSERT(isParsingFragmentOrTemplateContents());
1131             return;
1132         }
1133         processStartTag(WTFMove(token));
1134         break;
1135     case InsertionMode::InTableBody:
1136         if (token.name() == trTag) {
1137             m_tree.openElements().popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1138             m_tree.insertHTMLElement(WTFMove(token));
1139             m_insertionMode = InsertionMode::InRow;
1140             return;
1141         }
1142         if (isTableCellContextTag(token.name())) {
1143             parseError(token);
1144             processFakeStartTag(trTag);
1145             ASSERT(m_insertionMode == InsertionMode::InRow);
1146             processStartTag(WTFMove(token));
1147             return;
1148         }
1149         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1150             // FIXME: This is slow.
1151             if (!m_tree.openElements().inTableScope(tbodyTag) && !m_tree.openElements().inTableScope(theadTag) && !m_tree.openElements().inTableScope(tfootTag)) {
1152                 ASSERT(isParsingFragmentOrTemplateContents());
1153                 parseError(token);
1154                 return;
1155             }
1156             m_tree.openElements().popUntilTableBodyScopeMarker();
1157             ASSERT(isTableBodyContextTag(m_tree.currentStackItem().localName()));
1158             processFakeEndTag(m_tree.currentStackItem().localName());
1159             processStartTag(WTFMove(token));
1160             return;
1161         }
1162         processStartTagForInTable(WTFMove(token));
1163         break;
1164     case InsertionMode::InRow:
1165         if (isTableCellContextTag(token.name())) {
1166             m_tree.openElements().popUntilTableRowScopeMarker();
1167             m_tree.insertHTMLElement(WTFMove(token));
1168             m_insertionMode = InsertionMode::InCell;
1169             m_tree.activeFormattingElements().appendMarker();
1170             return;
1171         }
1172         if (token.name() == trTag
1173             || isCaptionColOrColgroupTag(token.name())
1174             || isTableBodyContextTag(token.name())) {
1175             if (!processTrEndTagForInRow()) {
1176                 ASSERT(isParsingFragmentOrTemplateContents());
1177                 return;
1178             }
1179             ASSERT(m_insertionMode == InsertionMode::InTableBody);
1180             processStartTag(WTFMove(token));
1181             return;
1182         }
1183         processStartTagForInTable(WTFMove(token));
1184         break;
1185     case InsertionMode::InCell:
1186         if (isCaptionColOrColgroupTag(token.name())
1187             || isTableCellContextTag(token.name())
1188             || token.name() == trTag
1189             || isTableBodyContextTag(token.name())) {
1190             // FIXME: This could be more efficient.
1191             if (!m_tree.openElements().inTableScope(tdTag) && !m_tree.openElements().inTableScope(thTag)) {
1192                 ASSERT(isParsingFragment());
1193                 parseError(token);
1194                 return;
1195             }
1196             closeTheCell();
1197             processStartTag(WTFMove(token));
1198             return;
1199         }
1200         processStartTagForInBody(WTFMove(token));
1201         break;
1202     case InsertionMode::AfterBody:
1203     case InsertionMode::AfterAfterBody:
1204         if (token.name() == htmlTag) {
1205             processHtmlStartTagForInBody(WTFMove(token));
1206             return;
1207         }
1208         m_insertionMode = InsertionMode::InBody;
1209         processStartTag(WTFMove(token));
1210         break;
1211     case InsertionMode::InHeadNoscript:
1212         if (token.name() == htmlTag) {
1213             processHtmlStartTagForInBody(WTFMove(token));
1214             return;
1215         }
1216         if (token.name() == basefontTag
1217             || token.name() == bgsoundTag
1218             || token.name() == linkTag
1219             || token.name() == metaTag
1220             || token.name() == noframesTag
1221             || token.name() == styleTag) {
1222             bool didProcess = processStartTagForInHead(WTFMove(token));
1223             ASSERT_UNUSED(didProcess, didProcess);
1224             return;
1225         }
1226         if (token.name() == htmlTag || token.name() == noscriptTag) {
1227             parseError(token);
1228             return;
1229         }
1230         defaultForInHeadNoscript();
1231         processToken(WTFMove(token));
1232         break;
1233     case InsertionMode::InFrameset:
1234         if (token.name() == htmlTag) {
1235             processHtmlStartTagForInBody(WTFMove(token));
1236             return;
1237         }
1238         if (token.name() == framesetTag) {
1239             m_tree.insertHTMLElement(WTFMove(token));
1240             return;
1241         }
1242         if (token.name() == frameTag) {
1243             m_tree.insertSelfClosingHTMLElement(WTFMove(token));
1244             return;
1245         }
1246         if (token.name() == noframesTag) {
1247             processStartTagForInHead(WTFMove(token));
1248             return;
1249         }
1250         parseError(token);
1251         break;
1252     case InsertionMode::AfterFrameset:
1253     case InsertionMode::AfterAfterFrameset:
1254         if (token.name() == htmlTag) {
1255             processHtmlStartTagForInBody(WTFMove(token));
1256             return;
1257         }
1258         if (token.name() == noframesTag) {
1259             processStartTagForInHead(WTFMove(token));
1260             return;
1261         }
1262         parseError(token);
1263         break;
1264     case InsertionMode::InSelectInTable:
1265         if (token.name() == captionTag
1266             || token.name() == tableTag
1267             || isTableBodyContextTag(token.name())
1268             || token.name() == trTag
1269             || isTableCellContextTag(token.name())) {
1270             parseError(token);
1271             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1272             processEndTag(WTFMove(endSelect));
1273             processStartTag(WTFMove(token));
1274             return;
1275         }
1276         FALLTHROUGH;
1277     case InsertionMode::InSelect:
1278         if (token.name() == htmlTag) {
1279             processHtmlStartTagForInBody(WTFMove(token));
1280             return;
1281         }
1282         if (token.name() == optionTag) {
1283             if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
1284                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
1285                 processEndTag(WTFMove(endOption));
1286             }
1287             m_tree.insertHTMLElement(WTFMove(token));
1288             return;
1289         }
1290         if (token.name() == optgroupTag) {
1291             if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
1292                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag->localName());
1293                 processEndTag(WTFMove(endOption));
1294             }
1295             if (is<HTMLOptGroupElement>(m_tree.currentStackItem().node())) {
1296                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag->localName());
1297                 processEndTag(WTFMove(endOptgroup));
1298             }
1299             m_tree.insertHTMLElement(WTFMove(token));
1300             return;
1301         }
1302         if (token.name() == selectTag) {
1303             parseError(token);
1304             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1305             processEndTag(WTFMove(endSelect));
1306             return;
1307         }
1308         if (token.name() == inputTag || token.name() == keygenTag || token.name() == textareaTag) {
1309             parseError(token);
1310             if (!m_tree.openElements().inSelectScope(selectTag)) {
1311                 ASSERT(isParsingFragment());
1312                 return;
1313             }
1314             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
1315             processEndTag(WTFMove(endSelect));
1316             processStartTag(WTFMove(token));
1317             return;
1318         }
1319         if (token.name() == scriptTag) {
1320             bool didProcess = processStartTagForInHead(WTFMove(token));
1321             ASSERT_UNUSED(didProcess, didProcess);
1322             return;
1323         }
1324         if (token.name() == templateTag) {
1325             processTemplateStartTag(WTFMove(token));
1326             return;
1327         }
1328         break;
1329     case InsertionMode::InTableText:
1330         defaultForInTableText();
1331         processStartTag(WTFMove(token));
1332         break;
1333     case InsertionMode::Text:
1334         ASSERT_NOT_REACHED();
1335         break;
1336     case InsertionMode::TemplateContents:
1337         if (token.name() == templateTag) {
1338             processTemplateStartTag(WTFMove(token));
1339             return;
1340         }
1341
1342         if (token.name() == linkTag
1343             || token.name() == scriptTag
1344             || token.name() == styleTag
1345             || token.name() == metaTag) {
1346             processStartTagForInHead(WTFMove(token));
1347             return;
1348         }
1349
1350         InsertionMode insertionMode = InsertionMode::TemplateContents;
1351         if (token.name() == colTag)
1352             insertionMode = InsertionMode::InColumnGroup;
1353         else if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name()))
1354             insertionMode = InsertionMode::InTable;
1355         else if (token.name() == trTag)
1356             insertionMode = InsertionMode::InTableBody;
1357         else if (isTableCellContextTag(token.name()))
1358             insertionMode = InsertionMode::InRow;
1359         else
1360             insertionMode = InsertionMode::InBody;
1361
1362         ASSERT(insertionMode != InsertionMode::TemplateContents);
1363         ASSERT(m_templateInsertionModes.last() == InsertionMode::TemplateContents);
1364         m_templateInsertionModes.last() = insertionMode;
1365         m_insertionMode = insertionMode;
1366
1367         processStartTag(WTFMove(token));
1368         break;
1369     }
1370 }
1371
1372 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken&& token)
1373 {
1374     parseError(token);
1375     if (m_tree.openElements().hasTemplateInHTMLScope()) {
1376         ASSERT(isParsingTemplateContents());
1377         return;
1378     }
1379     m_tree.insertHTMLHtmlStartTagInBody(WTFMove(token));
1380 }
1381
1382 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken&& token)
1383 {
1384     ASSERT(token.type() == HTMLToken::EndTag);
1385     ASSERT(token.name() == bodyTag);
1386     if (!m_tree.openElements().inScope(bodyTag->localName())) {
1387         parseError(token);
1388         return false;
1389     }
1390     notImplemented(); // Emit a more specific parse error based on stack contents.
1391     m_insertionMode = InsertionMode::AfterBody;
1392     return true;
1393 }
1394
1395 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken&& token)
1396 {
1397     ASSERT(token.type() == HTMLToken::EndTag);
1398     for (auto* record = &m_tree.openElements().topRecord(); ; record = record->next()) {
1399         HTMLStackItem& item = record->stackItem();
1400         if (item.matchesHTMLTag(token.name())) {
1401             m_tree.generateImpliedEndTagsWithExclusion(token.name());
1402             if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1403                 parseError(token);
1404             m_tree.openElements().popUntilPopped(item.element());
1405             return;
1406         }
1407         if (isSpecialNode(item)) {
1408             parseError(token);
1409             return;
1410         }
1411     }
1412 }
1413
1414 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1415 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1416 {
1417     // The adoption agency algorithm is N^2. We limit the number of iterations
1418     // to stop from hanging the whole browser. This limit is specified in the
1419     // adoption agency algorithm: 
1420     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1421     static const int outerIterationLimit = 8;
1422     static const int innerIterationLimit = 3;
1423
1424     // 1, 2, 3 and 16 are covered by the for() loop.
1425     for (int i = 0; i < outerIterationLimit; ++i) {
1426         // 4.
1427         RefPtr<Element> formattingElement = m_tree.activeFormattingElements().closestElementInScopeWithName(token.name());
1428         // 4.a
1429         if (!formattingElement)
1430             return processAnyOtherEndTagForInBody(WTFMove(token));
1431         // 4.c
1432         if ((m_tree.openElements().contains(*formattingElement)) && !m_tree.openElements().inScope(*formattingElement)) {
1433             parseError(token);
1434             notImplemented(); // Check the stack of open elements for a more specific parse error.
1435             return;
1436         }
1437         // 4.b
1438         auto* formattingElementRecord = m_tree.openElements().find(*formattingElement);
1439         if (!formattingElementRecord) {
1440             parseError(token);
1441             m_tree.activeFormattingElements().remove(*formattingElement);
1442             return;
1443         }
1444         // 4.d
1445         if (formattingElement != &m_tree.currentElement())
1446             parseError(token);
1447         // 5.
1448         auto* furthestBlock = m_tree.openElements().furthestBlockForFormattingElement(*formattingElement);
1449         // 6.
1450         if (!furthestBlock) {
1451             m_tree.openElements().popUntilPopped(*formattingElement);
1452             m_tree.activeFormattingElements().remove(*formattingElement);
1453             return;
1454         }
1455         // 7.
1456         ASSERT(furthestBlock->isAbove(*formattingElementRecord));
1457         Ref<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1458         // 8.
1459         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements().bookmarkFor(*formattingElement);
1460         // 9.
1461         auto* node = furthestBlock;
1462         auto* nextNode = node->next();
1463         auto* lastNode = furthestBlock;
1464         // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1465         for (int i = 0; i < innerIterationLimit; ++i) {
1466             // 9.4
1467             node = nextNode;
1468             ASSERT(node);
1469             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1470             // 9.5
1471             if (!m_tree.activeFormattingElements().contains(node->element())) {
1472                 m_tree.openElements().remove(node->element());
1473                 node = 0;
1474                 continue;
1475             }
1476             // 9.6
1477             if (node == formattingElementRecord)
1478                 break;
1479             // 9.7
1480             auto newItem = m_tree.createElementFromSavedToken(node->stackItem());
1481
1482             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements().find(node->element());
1483             nodeEntry->replaceElement(newItem.copyRef());
1484             node->replaceElement(WTFMove(newItem));
1485
1486             // 9.8
1487             if (lastNode == furthestBlock)
1488                 bookmark.moveToAfter(*nodeEntry);
1489             // 9.9
1490             m_tree.reparent(*node, *lastNode);
1491             // 9.10
1492             lastNode = node;
1493         }
1494         // 10.
1495         m_tree.insertAlreadyParsedChild(commonAncestor.get(), *lastNode);
1496         // 11.
1497         auto newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem());
1498         // 12. & 13.
1499         m_tree.takeAllChildrenAndReparent(newItem, *furthestBlock);
1500         // 14.
1501         m_tree.activeFormattingElements().swapTo(*formattingElement, newItem.copyRef(), bookmark);
1502         // 15.
1503         m_tree.openElements().remove(*formattingElement);
1504         m_tree.openElements().insertAbove(WTFMove(newItem), *furthestBlock);
1505     }
1506 }
1507
1508 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1509 {
1510     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1511     bool last = false;
1512     for (auto* record = &m_tree.openElements().topRecord(); ; record = record->next()) {
1513         RefPtr<HTMLStackItem> item = &record->stackItem();
1514         if (&item->node() == &m_tree.openElements().rootNode()) {
1515             last = true;
1516             bool shouldCreateItem = isParsingFragment();
1517             if (shouldCreateItem)
1518                 item = &m_fragmentContext.contextElementStackItem();
1519         }
1520
1521         if (item->hasTagName(templateTag)) {
1522             m_insertionMode = m_templateInsertionModes.last();
1523             return;
1524         }
1525
1526         if (item->hasTagName(selectTag)) {
1527             if (!last) {
1528                 while (&item->node() != &m_tree.openElements().rootNode() && !item->hasTagName(templateTag)) {
1529                     record = record->next();
1530                     item = &record->stackItem();
1531                     if (is<HTMLTableElement>(item->node())) {
1532                         m_insertionMode = InsertionMode::InSelectInTable;
1533                         return;
1534                     }
1535                 }
1536             }
1537             m_insertionMode = InsertionMode::InSelect;
1538             return;
1539         }
1540         if (item->hasTagName(tdTag) || item->hasTagName(thTag)) {
1541             m_insertionMode = InsertionMode::InCell;
1542             return;
1543         }
1544         if (item->hasTagName(trTag)) {
1545             m_insertionMode = InsertionMode::InRow;
1546             return;
1547         }
1548         if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag)) {
1549             m_insertionMode = InsertionMode::InTableBody;
1550             return;
1551         }
1552         if (item->hasTagName(captionTag)) {
1553             m_insertionMode = InsertionMode::InCaption;
1554             return;
1555         }
1556         if (item->hasTagName(colgroupTag)) {
1557             m_insertionMode = InsertionMode::InColumnGroup;
1558             return;
1559         }
1560         if (is<HTMLTableElement>(item->node())) {
1561             m_insertionMode = InsertionMode::InTable;
1562             return;
1563         }
1564         if (item->hasTagName(headTag)) {
1565             if (!m_fragmentContext.fragment() || &m_fragmentContext.contextElement() != &item->node()) {
1566                 m_insertionMode = InsertionMode::InHead;
1567                 return;
1568             }
1569             m_insertionMode = InsertionMode::InBody;
1570             return;
1571         }
1572         if (item->hasTagName(bodyTag)) {
1573             m_insertionMode = InsertionMode::InBody;
1574             return;
1575         }
1576         if (item->hasTagName(framesetTag)) {
1577             m_insertionMode = InsertionMode::InFrameset;
1578             return;
1579         }
1580         if (item->hasTagName(htmlTag)) {
1581             if (m_tree.headStackItem()) {
1582                 m_insertionMode = InsertionMode::AfterHead;
1583                 return;
1584             }
1585             ASSERT(isParsingFragment());
1586             m_insertionMode = InsertionMode::BeforeHead;
1587             return;
1588         }
1589         if (last) {
1590             ASSERT(isParsingFragment());
1591             m_insertionMode = InsertionMode::InBody;
1592             return;
1593         }
1594     }
1595 }
1596
1597 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken&& token)
1598 {
1599     ASSERT(token.type() == HTMLToken::EndTag);
1600     if (isTableBodyContextTag(token.name())) {
1601         if (!m_tree.openElements().inTableScope(token.name())) {
1602             parseError(token);
1603             return;
1604         }
1605         m_tree.openElements().popUntilTableBodyScopeMarker();
1606         m_tree.openElements().pop();
1607         m_insertionMode = InsertionMode::InTable;
1608         return;
1609     }
1610     if (token.name() == tableTag) {
1611         // FIXME: This is slow.
1612         if (!m_tree.openElements().inTableScope(tbodyTag) && !m_tree.openElements().inTableScope(theadTag) && !m_tree.openElements().inTableScope(tfootTag)) {
1613             ASSERT(isParsingFragmentOrTemplateContents());
1614             parseError(token);
1615             return;
1616         }
1617         m_tree.openElements().popUntilTableBodyScopeMarker();
1618         ASSERT(isTableBodyContextTag(m_tree.currentStackItem().localName()));
1619         processFakeEndTag(m_tree.currentStackItem().localName());
1620         processEndTag(WTFMove(token));
1621         return;
1622     }
1623     if (token.name() == bodyTag
1624         || isCaptionColOrColgroupTag(token.name())
1625         || token.name() == htmlTag
1626         || isTableCellContextTag(token.name())
1627         || token.name() == trTag) {
1628         parseError(token);
1629         return;
1630     }
1631     processEndTagForInTable(WTFMove(token));
1632 }
1633
1634 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken&& token)
1635 {
1636     ASSERT(token.type() == HTMLToken::EndTag);
1637     if (token.name() == trTag) {
1638         processTrEndTagForInRow();
1639         return;
1640     }
1641     if (token.name() == tableTag) {
1642         if (!processTrEndTagForInRow()) {
1643             ASSERT(isParsingFragmentOrTemplateContents());
1644             return;
1645         }
1646         ASSERT(m_insertionMode == InsertionMode::InTableBody);
1647         processEndTag(WTFMove(token));
1648         return;
1649     }
1650     if (isTableBodyContextTag(token.name())) {
1651         if (!m_tree.openElements().inTableScope(token.name())) {
1652             parseError(token);
1653             return;
1654         }
1655         processFakeEndTag(trTag);
1656         ASSERT(m_insertionMode == InsertionMode::InTableBody);
1657         processEndTag(WTFMove(token));
1658         return;
1659     }
1660     if (token.name() == bodyTag
1661         || isCaptionColOrColgroupTag(token.name())
1662         || token.name() == htmlTag
1663         || isTableCellContextTag(token.name())) {
1664         parseError(token);
1665         return;
1666     }
1667     processEndTagForInTable(WTFMove(token));
1668 }
1669
1670 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken&& token)
1671 {
1672     ASSERT(token.type() == HTMLToken::EndTag);
1673     if (isTableCellContextTag(token.name())) {
1674         if (!m_tree.openElements().inTableScope(token.name())) {
1675             parseError(token);
1676             return;
1677         }
1678         m_tree.generateImpliedEndTags();
1679         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1680             parseError(token);
1681         m_tree.openElements().popUntilPopped(token.name());
1682         m_tree.activeFormattingElements().clearToLastMarker();
1683         m_insertionMode = InsertionMode::InRow;
1684         return;
1685     }
1686     if (token.name() == bodyTag
1687         || isCaptionColOrColgroupTag(token.name())
1688         || token.name() == htmlTag) {
1689         parseError(token);
1690         return;
1691     }
1692     if (token.name() == tableTag
1693         || token.name() == trTag
1694         || isTableBodyContextTag(token.name())) {
1695         if (!m_tree.openElements().inTableScope(token.name())) {
1696             ASSERT(isTableBodyContextTag(token.name()) || m_tree.openElements().inTableScope(templateTag) || isParsingFragment());
1697             parseError(token);
1698             return;
1699         }
1700         closeTheCell();
1701         processEndTag(WTFMove(token));
1702         return;
1703     }
1704     processEndTagForInBody(WTFMove(token));
1705 }
1706
1707 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken&& token)
1708 {
1709     ASSERT(token.type() == HTMLToken::EndTag);
1710     if (token.name() == bodyTag) {
1711         processBodyEndTagForInBody(WTFMove(token));
1712         return;
1713     }
1714     if (token.name() == htmlTag) {
1715         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag->localName());
1716         if (processBodyEndTagForInBody(WTFMove(endBody)))
1717             processEndTag(WTFMove(token));
1718         return;
1719     }
1720     if (token.name() == addressTag
1721         || token.name() == articleTag
1722         || token.name() == asideTag
1723         || token.name() == blockquoteTag
1724         || token.name() == buttonTag
1725         || token.name() == centerTag
1726         || token.name() == detailsTag
1727         || token.name() == dirTag
1728         || token.name() == divTag
1729         || token.name() == dlTag
1730         || token.name() == fieldsetTag
1731         || token.name() == figcaptionTag
1732         || token.name() == figureTag
1733         || token.name() == footerTag
1734         || token.name() == headerTag
1735         || token.name() == hgroupTag
1736         || token.name() == listingTag
1737         || token.name() == mainTag
1738         || token.name() == menuTag
1739         || token.name() == navTag
1740         || token.name() == olTag
1741         || token.name() == preTag
1742         || token.name() == sectionTag
1743         || token.name() == summaryTag
1744         || token.name() == ulTag) {
1745         if (!m_tree.openElements().inScope(token.name())) {
1746             parseError(token);
1747             return;
1748         }
1749         m_tree.generateImpliedEndTags();
1750         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1751             parseError(token);
1752         m_tree.openElements().popUntilPopped(token.name());
1753         return;
1754     }
1755     if (token.name() == formTag) {
1756         if (!isParsingTemplateContents()) {
1757             RefPtr<Element> formElement = m_tree.takeForm();
1758             if (!formElement || !m_tree.openElements().inScope(*formElement)) {
1759                 parseError(token);
1760                 return;
1761             }
1762             m_tree.generateImpliedEndTags();
1763             if (&m_tree.currentNode() != formElement.get())
1764                 parseError(token);
1765             m_tree.openElements().remove(*formElement);
1766         } else {
1767             if (!m_tree.openElements().inScope(token.name())) {
1768                 parseError(token);
1769                 return;
1770             }
1771             m_tree.generateImpliedEndTags();
1772             if (!m_tree.currentNode().hasTagName(formTag))
1773                 parseError(token);
1774             m_tree.openElements().popUntilPopped(token.name());
1775         }
1776     }
1777     if (token.name() == pTag) {
1778         if (!m_tree.openElements().inButtonScope(token.name())) {
1779             parseError(token);
1780             processFakeStartTag(pTag);
1781             ASSERT(m_tree.openElements().inScope(token.name()));
1782             processEndTag(WTFMove(token));
1783             return;
1784         }
1785         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1786         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1787             parseError(token);
1788         m_tree.openElements().popUntilPopped(token.name());
1789         return;
1790     }
1791     if (token.name() == liTag) {
1792         if (!m_tree.openElements().inListItemScope(token.name())) {
1793             parseError(token);
1794             return;
1795         }
1796         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1797         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1798             parseError(token);
1799         m_tree.openElements().popUntilPopped(token.name());
1800         return;
1801     }
1802     if (token.name() == ddTag || token.name() == dtTag) {
1803         if (!m_tree.openElements().inScope(token.name())) {
1804             parseError(token);
1805             return;
1806         }
1807         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1808         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1809             parseError(token);
1810         m_tree.openElements().popUntilPopped(token.name());
1811         return;
1812     }
1813     if (isNumberedHeaderTag(token.name())) {
1814         if (!m_tree.openElements().hasNumberedHeaderElementInScope()) {
1815             parseError(token);
1816             return;
1817         }
1818         m_tree.generateImpliedEndTags();
1819         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1820             parseError(token);
1821         m_tree.openElements().popUntilNumberedHeaderElementPopped();
1822         return;
1823     }
1824     if (HTMLConstructionSite::isFormattingTag(token.name())) {
1825         callTheAdoptionAgency(token);
1826         return;
1827     }
1828     if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) {
1829         if (!m_tree.openElements().inScope(token.name())) {
1830             parseError(token);
1831             return;
1832         }
1833         m_tree.generateImpliedEndTags();
1834         if (!m_tree.currentStackItem().matchesHTMLTag(token.name()))
1835             parseError(token);
1836         m_tree.openElements().popUntilPopped(token.name());
1837         m_tree.activeFormattingElements().clearToLastMarker();
1838         return;
1839     }
1840     if (token.name() == brTag) {
1841         parseError(token);
1842         processFakeStartTag(brTag);
1843         return;
1844     }
1845     if (token.name() == templateTag) {
1846         processTemplateEndTag(WTFMove(token));
1847         return;
1848     }
1849     processAnyOtherEndTagForInBody(WTFMove(token));
1850 }
1851
1852 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1853 {
1854     if (!m_tree.openElements().inTableScope(captionTag->localName())) {
1855         ASSERT(isParsingFragment());
1856         // FIXME: parse error
1857         return false;
1858     }
1859     m_tree.generateImpliedEndTags();
1860     // FIXME: parse error if (!m_tree.currentStackItem().hasTagName(captionTag))
1861     m_tree.openElements().popUntilPopped(captionTag->localName());
1862     m_tree.activeFormattingElements().clearToLastMarker();
1863     m_insertionMode = InsertionMode::InTable;
1864     return true;
1865 }
1866
1867 bool HTMLTreeBuilder::processTrEndTagForInRow()
1868 {
1869     if (!m_tree.openElements().inTableScope(trTag)) {
1870         ASSERT(isParsingFragmentOrTemplateContents());
1871         // FIXME: parse error
1872         return false;
1873     }
1874     m_tree.openElements().popUntilTableRowScopeMarker();
1875     ASSERT(m_tree.currentStackItem().hasTagName(trTag));
1876     m_tree.openElements().pop();
1877     m_insertionMode = InsertionMode::InTableBody;
1878     return true;
1879 }
1880
1881 bool HTMLTreeBuilder::processTableEndTagForInTable()
1882 {
1883     if (!m_tree.openElements().inTableScope(tableTag)) {
1884         ASSERT(isParsingFragmentOrTemplateContents());
1885         // FIXME: parse error.
1886         return false;
1887     }
1888     m_tree.openElements().popUntilPopped(tableTag->localName());
1889     resetInsertionModeAppropriately();
1890     return true;
1891 }
1892
1893 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken&& token)
1894 {
1895     ASSERT(token.type() == HTMLToken::EndTag);
1896     if (token.name() == tableTag) {
1897         processTableEndTagForInTable();
1898         return;
1899     }
1900     if (token.name() == bodyTag
1901         || isCaptionColOrColgroupTag(token.name())
1902         || token.name() == htmlTag
1903         || isTableBodyContextTag(token.name())
1904         || isTableCellContextTag(token.name())
1905         || token.name() == trTag) {
1906         parseError(token);
1907         return;
1908     }
1909     parseError(token);
1910     // Is this redirection necessary here?
1911     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1912     processEndTagForInBody(WTFMove(token));
1913 }
1914
1915 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken&& token)
1916 {
1917     ASSERT(token.type() == HTMLToken::EndTag);
1918     switch (m_insertionMode) {
1919     case InsertionMode::Initial:
1920         defaultForInitial();
1921         ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
1922         FALLTHROUGH;
1923     case InsertionMode::BeforeHTML:
1924         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1925             parseError(token);
1926             return;
1927         }
1928         defaultForBeforeHTML();
1929         ASSERT(m_insertionMode == InsertionMode::BeforeHead);
1930         FALLTHROUGH;
1931     case InsertionMode::BeforeHead:
1932         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1933             parseError(token);
1934             return;
1935         }
1936         defaultForBeforeHead();
1937         ASSERT(m_insertionMode == InsertionMode::InHead);
1938         FALLTHROUGH;
1939     case InsertionMode::InHead:
1940         // FIXME: This case should be broken out into processEndTagForInHead,
1941         // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1942         // but because the logic falls through to InsertionMode::AfterHead, that gets a little messy.
1943         if (token.name() == templateTag) {
1944             processTemplateEndTag(WTFMove(token));
1945             return;
1946         }
1947         if (token.name() == headTag) {
1948             m_tree.openElements().popHTMLHeadElement();
1949             m_insertionMode = InsertionMode::AfterHead;
1950             return;
1951         }
1952         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1953             parseError(token);
1954             return;
1955         }
1956         defaultForInHead();
1957         ASSERT(m_insertionMode == InsertionMode::AfterHead);
1958         FALLTHROUGH;
1959     case InsertionMode::AfterHead:
1960         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
1961             parseError(token);
1962             return;
1963         }
1964         defaultForAfterHead();
1965         ASSERT(m_insertionMode == InsertionMode::InBody);
1966         FALLTHROUGH;
1967     case InsertionMode::InBody:
1968         processEndTagForInBody(WTFMove(token));
1969         break;
1970     case InsertionMode::InTable:
1971         processEndTagForInTable(WTFMove(token));
1972         break;
1973     case InsertionMode::InCaption:
1974         if (token.name() == captionTag) {
1975             processCaptionEndTagForInCaption();
1976             return;
1977         }
1978         if (token.name() == tableTag) {
1979             parseError(token);
1980             if (!processCaptionEndTagForInCaption()) {
1981                 ASSERT(isParsingFragment());
1982                 return;
1983             }
1984             processEndTag(WTFMove(token));
1985             return;
1986         }
1987         if (token.name() == bodyTag
1988             || token.name() == colTag
1989             || token.name() == colgroupTag
1990             || token.name() == htmlTag
1991             || isTableBodyContextTag(token.name())
1992             || isTableCellContextTag(token.name())
1993             || token.name() == trTag) {
1994             parseError(token);
1995             return;
1996         }
1997         processEndTagForInBody(WTFMove(token));
1998         break;
1999     case InsertionMode::InColumnGroup:
2000         if (token.name() == colgroupTag) {
2001             processColgroupEndTagForInColumnGroup();
2002             return;
2003         }
2004         if (token.name() == colTag) {
2005             parseError(token);
2006             return;
2007         }
2008         if (token.name() == templateTag) {
2009             processTemplateEndTag(WTFMove(token));
2010             return;
2011         }
2012         if (!processColgroupEndTagForInColumnGroup()) {
2013             ASSERT(isParsingFragmentOrTemplateContents());
2014             return;
2015         }
2016         processEndTag(WTFMove(token));
2017         break;
2018     case InsertionMode::InRow:
2019         processEndTagForInRow(WTFMove(token));
2020         break;
2021     case InsertionMode::InCell:
2022         processEndTagForInCell(WTFMove(token));
2023         break;
2024     case InsertionMode::InTableBody:
2025         processEndTagForInTableBody(WTFMove(token));
2026         break;
2027     case InsertionMode::AfterBody:
2028         if (token.name() == htmlTag) {
2029             if (isParsingFragment()) {
2030                 parseError(token);
2031                 return;
2032             }
2033             m_insertionMode = InsertionMode::AfterAfterBody;
2034             return;
2035         }
2036         FALLTHROUGH;
2037     case InsertionMode::AfterAfterBody:
2038         ASSERT(m_insertionMode == InsertionMode::AfterBody || m_insertionMode == InsertionMode::AfterAfterBody);
2039         parseError(token);
2040         m_insertionMode = InsertionMode::InBody;
2041         processEndTag(WTFMove(token));
2042         break;
2043     case InsertionMode::InHeadNoscript:
2044         if (token.name() == noscriptTag) {
2045             ASSERT(m_tree.currentStackItem().hasTagName(noscriptTag));
2046             m_tree.openElements().pop();
2047             ASSERT(m_tree.currentStackItem().hasTagName(headTag));
2048             m_insertionMode = InsertionMode::InHead;
2049             return;
2050         }
2051         if (token.name() != brTag) {
2052             parseError(token);
2053             return;
2054         }
2055         defaultForInHeadNoscript();
2056         processToken(WTFMove(token));
2057         break;
2058     case InsertionMode::Text:
2059         if (token.name() == scriptTag) {
2060             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2061             ASSERT(m_tree.currentStackItem().hasTagName(scriptTag));
2062             if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2063                 m_scriptToProcess = &downcast<HTMLScriptElement>(m_tree.currentElement());
2064             m_tree.openElements().pop();
2065             m_insertionMode = m_originalInsertionMode;
2066
2067             // This token will not have been created by the tokenizer if a
2068             // self-closing script tag was encountered and pre-HTML5 parser
2069             // quirks are enabled. We must set the tokenizer's state to
2070             // DataState explicitly if the tokenizer didn't have a chance to.
2071             ASSERT(m_parser.tokenizer().isInDataState() || m_options.usePreHTML5ParserQuirks);
2072             m_parser.tokenizer().setDataState();
2073             return;
2074         }
2075         m_tree.openElements().pop();
2076         m_insertionMode = m_originalInsertionMode;
2077         break;
2078     case InsertionMode::InFrameset:
2079         if (token.name() == framesetTag) {
2080             bool ignoreFramesetForFragmentParsing  = m_tree.currentIsRootNode() || m_tree.openElements().hasTemplateInHTMLScope();
2081             if (ignoreFramesetForFragmentParsing) {
2082                 ASSERT(isParsingFragmentOrTemplateContents());
2083                 parseError(token);
2084                 return;
2085             }
2086             m_tree.openElements().pop();
2087             if (!isParsingFragment() && !m_tree.currentStackItem().hasTagName(framesetTag))
2088                 m_insertionMode = InsertionMode::AfterFrameset;
2089             return;
2090         }
2091         break;
2092     case InsertionMode::AfterFrameset:
2093         if (token.name() == htmlTag) {
2094             m_insertionMode = InsertionMode::AfterAfterFrameset;
2095             return;
2096         }
2097         FALLTHROUGH;
2098     case InsertionMode::AfterAfterFrameset:
2099         ASSERT(m_insertionMode == InsertionMode::AfterFrameset || m_insertionMode == InsertionMode::AfterAfterFrameset);
2100         parseError(token);
2101         break;
2102     case InsertionMode::InSelectInTable:
2103         if (token.name() == captionTag
2104             || token.name() == tableTag
2105             || isTableBodyContextTag(token.name())
2106             || token.name() == trTag
2107             || isTableCellContextTag(token.name())) {
2108             parseError(token);
2109             if (m_tree.openElements().inTableScope(token.name())) {
2110                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag->localName());
2111                 processEndTag(WTFMove(endSelect));
2112                 processEndTag(WTFMove(token));
2113             }
2114             return;
2115         }
2116         FALLTHROUGH;
2117     case InsertionMode::InSelect:
2118         ASSERT(m_insertionMode == InsertionMode::InSelect || m_insertionMode == InsertionMode::InSelectInTable);
2119         if (token.name() == optgroupTag) {
2120             if (is<HTMLOptionElement>(m_tree.currentStackItem().node()) && m_tree.oneBelowTop() && is<HTMLOptGroupElement>(m_tree.oneBelowTop()->node()))
2121                 processFakeEndTag(optionTag);
2122             if (is<HTMLOptGroupElement>(m_tree.currentStackItem().node())) {
2123                 m_tree.openElements().pop();
2124                 return;
2125             }
2126             parseError(token);
2127             return;
2128         }
2129         if (token.name() == optionTag) {
2130             if (is<HTMLOptionElement>(m_tree.currentStackItem().node())) {
2131                 m_tree.openElements().pop();
2132                 return;
2133             }
2134             parseError(token);
2135             return;
2136         }
2137         if (token.name() == selectTag) {
2138             if (!m_tree.openElements().inSelectScope(token.name())) {
2139                 ASSERT(isParsingFragment());
2140                 parseError(token);
2141                 return;
2142             }
2143             m_tree.openElements().popUntilPopped(selectTag->localName());
2144             resetInsertionModeAppropriately();
2145             return;
2146         }
2147         if (token.name() == templateTag) {
2148             processTemplateEndTag(WTFMove(token));
2149             return;
2150         }
2151         break;
2152     case InsertionMode::InTableText:
2153         defaultForInTableText();
2154         processEndTag(WTFMove(token));
2155         break;
2156     case InsertionMode::TemplateContents:
2157         if (token.name() == templateTag) {
2158             processTemplateEndTag(WTFMove(token));
2159             return;
2160         }
2161         break;
2162     }
2163 }
2164
2165 void HTMLTreeBuilder::processComment(AtomicHTMLToken&& token)
2166 {
2167     ASSERT(token.type() == HTMLToken::Comment);
2168     if (m_insertionMode == InsertionMode::Initial
2169         || m_insertionMode == InsertionMode::BeforeHTML
2170         || m_insertionMode == InsertionMode::AfterAfterBody
2171         || m_insertionMode == InsertionMode::AfterAfterFrameset) {
2172         m_tree.insertCommentOnDocument(WTFMove(token));
2173         return;
2174     }
2175     if (m_insertionMode == InsertionMode::AfterBody) {
2176         m_tree.insertCommentOnHTMLHtmlElement(WTFMove(token));
2177         return;
2178     }
2179     if (m_insertionMode == InsertionMode::InTableText) {
2180         defaultForInTableText();
2181         processComment(WTFMove(token));
2182         return;
2183     }
2184     m_tree.insertComment(WTFMove(token));
2185 }
2186
2187 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken&& token)
2188 {
2189     ASSERT(token.type() == HTMLToken::Character);
2190     ExternalCharacterTokenBuffer buffer(token);
2191     processCharacterBuffer(buffer);
2192 }
2193
2194 #if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2195
2196 // FIXME: Extract the following iOS-specific code into a separate file.
2197 // From the string 4089961010, creates a link of the form <a href="tel:4089961010">4089961010</a> and inserts it.
2198 void HTMLTreeBuilder::insertPhoneNumberLink(const String& string)
2199 {
2200     Vector<Attribute> attributes;
2201     attributes.append(Attribute(HTMLNames::hrefAttr, makeString("tel:"_s, string)));
2202
2203     const AtomicString& aTagLocalName = aTag->localName();
2204     AtomicHTMLToken aStartToken(HTMLToken::StartTag, aTagLocalName, WTFMove(attributes));
2205     AtomicHTMLToken aEndToken(HTMLToken::EndTag, aTagLocalName);
2206
2207     processStartTag(WTFMove(aStartToken));
2208     m_tree.executeQueuedTasks();
2209     m_tree.insertTextNode(string);
2210     processEndTag(WTFMove(aEndToken));
2211 }
2212
2213 // Locates the phone numbers in the string and deals with it
2214 // 1. Appends the text before the phone number as a text node.
2215 // 2. Wraps the phone number in a tel: link.
2216 // 3. Goes back to step 1 if a phone number is found in the rest of the string.
2217 // 4. Appends the rest of the string as a text node.
2218 void HTMLTreeBuilder::linkifyPhoneNumbers(const String& string)
2219 {
2220     ASSERT(TelephoneNumberDetector::isSupported());
2221
2222     // relativeStartPosition and relativeEndPosition are the endpoints of the phone number range,
2223     // relative to the scannerPosition
2224     unsigned length = string.length();
2225     unsigned scannerPosition = 0;
2226     int relativeStartPosition = 0;
2227     int relativeEndPosition = 0;
2228
2229     auto characters = StringView(string).upconvertedCharacters();
2230
2231     // While there's a phone number in the rest of the string...
2232     while (scannerPosition < length && TelephoneNumberDetector::find(&characters[scannerPosition], length - scannerPosition, &relativeStartPosition, &relativeEndPosition)) {
2233         // The convention in the Data Detectors framework is that the end position is the first character NOT in the phone number
2234         // (that is, the length of the range is relativeEndPosition - relativeStartPosition). So substract 1 to get the same
2235         // convention as the old WebCore phone number parser (so that the rest of the code is still valid if we want to go back
2236         // to the old parser).
2237         --relativeEndPosition;
2238
2239         ASSERT(scannerPosition + relativeEndPosition < length);
2240
2241         m_tree.insertTextNode(string.substring(scannerPosition, relativeStartPosition));
2242         insertPhoneNumberLink(string.substring(scannerPosition + relativeStartPosition, relativeEndPosition - relativeStartPosition + 1));
2243
2244         scannerPosition += relativeEndPosition + 1;
2245     }
2246
2247     // Append the rest as a text node.
2248     if (scannerPosition > 0) {
2249         if (scannerPosition < length) {
2250             String after = string.substring(scannerPosition, length - scannerPosition);
2251             m_tree.insertTextNode(after);
2252         }
2253     } else
2254         m_tree.insertTextNode(string);
2255 }
2256
2257 // Looks at the ancestors of the element to determine whether we're inside an element which disallows parsing phone numbers.
2258 static inline bool disallowTelephoneNumberParsing(const ContainerNode& node)
2259 {
2260     return node.isLink()
2261         || node.hasTagName(scriptTag)
2262         || is<HTMLFormControlElement>(node)
2263         || node.hasTagName(styleTag)
2264         || node.hasTagName(ttTag)
2265         || node.hasTagName(preTag)
2266         || node.hasTagName(codeTag);
2267 }
2268
2269 static inline bool shouldParseTelephoneNumbersInNode(const ContainerNode& node)
2270 {
2271     for (const ContainerNode* ancestor = &node; ancestor; ancestor = ancestor->parentNode()) {
2272         if (disallowTelephoneNumberParsing(*ancestor))
2273             return false;
2274     }
2275     return true;
2276 }
2277
2278 #endif // ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2279
2280 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2281 {
2282 ReprocessBuffer:
2283     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2284     // Note that this logic is different than the generic \r\n collapsing
2285     // handled in the input stream preprocessor. This logic is here as an
2286     // "authoring convenience" so folks can write:
2287     //
2288     // <pre>
2289     // lorem ipsum
2290     // lorem ipsum
2291     // </pre>
2292     //
2293     // without getting an extra newline at the start of their <pre> element.
2294     if (m_shouldSkipLeadingNewline) {
2295         m_shouldSkipLeadingNewline = false;
2296         buffer.skipAtMostOneLeadingNewline();
2297         if (buffer.isEmpty())
2298             return;
2299     }
2300
2301     switch (m_insertionMode) {
2302     case InsertionMode::Initial:
2303         buffer.skipLeadingWhitespace();
2304         if (buffer.isEmpty())
2305             return;
2306         defaultForInitial();
2307         ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
2308         FALLTHROUGH;
2309     case InsertionMode::BeforeHTML:
2310         buffer.skipLeadingWhitespace();
2311         if (buffer.isEmpty())
2312             return;
2313         defaultForBeforeHTML();
2314         ASSERT(m_insertionMode == InsertionMode::BeforeHead);
2315         FALLTHROUGH;
2316     case InsertionMode::BeforeHead:
2317         buffer.skipLeadingWhitespace();
2318         if (buffer.isEmpty())
2319             return;
2320         defaultForBeforeHead();
2321         ASSERT(m_insertionMode == InsertionMode::InHead);
2322         FALLTHROUGH;
2323     case InsertionMode::InHead: {
2324         String leadingWhitespace = buffer.takeLeadingWhitespace();
2325         if (!leadingWhitespace.isEmpty())
2326             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2327         if (buffer.isEmpty())
2328             return;
2329         defaultForInHead();
2330         ASSERT(m_insertionMode == InsertionMode::AfterHead);
2331         FALLTHROUGH;
2332     }
2333     case InsertionMode::AfterHead: {
2334         String leadingWhitespace = buffer.takeLeadingWhitespace();
2335         if (!leadingWhitespace.isEmpty())
2336             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2337         if (buffer.isEmpty())
2338             return;
2339         defaultForAfterHead();
2340         ASSERT(m_insertionMode == InsertionMode::InBody);
2341         FALLTHROUGH;
2342     }
2343     case InsertionMode::InBody:
2344     case InsertionMode::InCaption:
2345     case InsertionMode::InCell:
2346     case InsertionMode::TemplateContents:
2347         processCharacterBufferForInBody(buffer);
2348         break;
2349     case InsertionMode::InTable:
2350     case InsertionMode::InTableBody:
2351     case InsertionMode::InRow:
2352         ASSERT(m_pendingTableCharacters.isEmpty());
2353         if (is<HTMLTableElement>(m_tree.currentStackItem().node())
2354             || m_tree.currentStackItem().hasTagName(HTMLNames::tbodyTag)
2355             || m_tree.currentStackItem().hasTagName(HTMLNames::tfootTag)
2356             || m_tree.currentStackItem().hasTagName(HTMLNames::theadTag)
2357             || m_tree.currentStackItem().hasTagName(HTMLNames::trTag)) {
2358
2359             m_originalInsertionMode = m_insertionMode;
2360             m_insertionMode = InsertionMode::InTableText;
2361             // Note that we fall through to the InsertionMode::InTableText case below.
2362         } else {
2363             HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2364             processCharacterBufferForInBody(buffer);
2365             break;
2366         }
2367         FALLTHROUGH;
2368     case InsertionMode::InTableText:
2369         buffer.giveRemainingTo(m_pendingTableCharacters);
2370         break;
2371     case InsertionMode::InColumnGroup: {
2372         String leadingWhitespace = buffer.takeLeadingWhitespace();
2373         if (!leadingWhitespace.isEmpty())
2374             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2375         if (buffer.isEmpty())
2376             return;
2377         if (!processColgroupEndTagForInColumnGroup()) {
2378             ASSERT(isParsingFragmentOrTemplateContents());
2379             // The spec tells us to drop these characters on the floor.
2380             buffer.skipLeadingNonWhitespace();
2381             if (buffer.isEmpty())
2382                 return;
2383         }
2384         goto ReprocessBuffer;
2385     }
2386     case InsertionMode::AfterBody:
2387     case InsertionMode::AfterAfterBody:
2388         // FIXME: parse error
2389         m_insertionMode = InsertionMode::InBody;
2390         goto ReprocessBuffer;
2391     case InsertionMode::Text:
2392         m_tree.insertTextNode(buffer.takeRemaining());
2393         break;
2394     case InsertionMode::InHeadNoscript: {
2395         String leadingWhitespace = buffer.takeLeadingWhitespace();
2396         if (!leadingWhitespace.isEmpty())
2397             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2398         if (buffer.isEmpty())
2399             return;
2400         defaultForInHeadNoscript();
2401         goto ReprocessBuffer;
2402     }
2403     case InsertionMode::InFrameset:
2404     case InsertionMode::AfterFrameset: {
2405         String leadingWhitespace = buffer.takeRemainingWhitespace();
2406         if (!leadingWhitespace.isEmpty())
2407             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2408         // FIXME: We should generate a parse error if we skipped over any
2409         // non-whitespace characters.
2410         break;
2411     }
2412     case InsertionMode::InSelectInTable:
2413     case InsertionMode::InSelect:
2414         m_tree.insertTextNode(buffer.takeRemaining());
2415         break;
2416     case InsertionMode::AfterAfterFrameset: {
2417         String leadingWhitespace = buffer.takeRemainingWhitespace();
2418         if (!leadingWhitespace.isEmpty()) {
2419             m_tree.reconstructTheActiveFormattingElements();
2420             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2421         }
2422         // FIXME: We should generate a parse error if we skipped over any
2423         // non-whitespace characters.
2424         break;
2425     }
2426     }
2427 }
2428
2429 void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
2430 {
2431     m_tree.reconstructTheActiveFormattingElements();
2432     String characters = buffer.takeRemaining();
2433 #if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS_FAMILY)
2434     if (!isParsingFragment() && m_tree.isTelephoneNumberParsingEnabled() && shouldParseTelephoneNumbersInNode(m_tree.currentNode()) && TelephoneNumberDetector::isSupported())
2435         linkifyPhoneNumbers(characters);
2436     else
2437         m_tree.insertTextNode(characters);
2438 #else
2439     m_tree.insertTextNode(characters);
2440 #endif
2441     if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2442         m_framesetOk = false;
2443 }
2444
2445 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken&& token)
2446 {
2447     ASSERT(token.type() == HTMLToken::EndOfFile);
2448     switch (m_insertionMode) {
2449     case InsertionMode::Initial:
2450         defaultForInitial();
2451         ASSERT(m_insertionMode == InsertionMode::BeforeHTML);
2452         FALLTHROUGH;
2453     case InsertionMode::BeforeHTML:
2454         defaultForBeforeHTML();
2455         ASSERT(m_insertionMode == InsertionMode::BeforeHead);
2456         FALLTHROUGH;
2457     case InsertionMode::BeforeHead:
2458         defaultForBeforeHead();
2459         ASSERT(m_insertionMode == InsertionMode::InHead);
2460         FALLTHROUGH;
2461     case InsertionMode::InHead:
2462         defaultForInHead();
2463         ASSERT(m_insertionMode == InsertionMode::AfterHead);
2464         FALLTHROUGH;
2465     case InsertionMode::AfterHead:
2466         defaultForAfterHead();
2467         ASSERT(m_insertionMode == InsertionMode::InBody);
2468         FALLTHROUGH;
2469     case InsertionMode::InBody:
2470     case InsertionMode::InCell:
2471     case InsertionMode::InCaption:
2472     case InsertionMode::InRow:
2473         notImplemented(); // Emit parse error based on what elements are still open.
2474         if (!m_templateInsertionModes.isEmpty()) {
2475             if (processEndOfFileForInTemplateContents(WTFMove(token)))
2476                 return;
2477         }
2478         break;
2479     case InsertionMode::AfterBody:
2480     case InsertionMode::AfterAfterBody:
2481         break;
2482     case InsertionMode::InHeadNoscript:
2483         defaultForInHeadNoscript();
2484         processEndOfFile(WTFMove(token));
2485         return;
2486     case InsertionMode::AfterFrameset:
2487     case InsertionMode::AfterAfterFrameset:
2488         break;
2489     case InsertionMode::InColumnGroup:
2490         if (m_tree.currentIsRootNode()) {
2491             ASSERT(isParsingFragment());
2492             return; // FIXME: Should we break here instead of returning?
2493         }
2494         ASSERT(m_tree.currentNode().hasTagName(colgroupTag) || m_tree.currentNode().hasTagName(templateTag));
2495         processColgroupEndTagForInColumnGroup();
2496         FALLTHROUGH;
2497     case InsertionMode::InFrameset:
2498     case InsertionMode::InTable:
2499     case InsertionMode::InTableBody:
2500     case InsertionMode::InSelectInTable:
2501     case InsertionMode::InSelect:
2502         ASSERT(m_insertionMode == InsertionMode::InSelect || m_insertionMode == InsertionMode::InSelectInTable || m_insertionMode == InsertionMode::InTable || m_insertionMode == InsertionMode::InFrameset || m_insertionMode == InsertionMode::InTableBody || m_insertionMode == InsertionMode::InColumnGroup);
2503         if (&m_tree.currentNode() != &m_tree.openElements().rootNode())
2504             parseError(token);
2505         if (!m_templateInsertionModes.isEmpty()) {
2506             if (processEndOfFileForInTemplateContents(WTFMove(token)))
2507                 return;
2508         }
2509         break;
2510     case InsertionMode::InTableText:
2511         defaultForInTableText();
2512         processEndOfFile(WTFMove(token));
2513         return;
2514     case InsertionMode::Text:
2515         parseError(token);
2516         if (m_tree.currentStackItem().hasTagName(scriptTag))
2517             notImplemented(); // mark the script element as "already started".
2518         m_tree.openElements().pop();
2519         ASSERT(m_originalInsertionMode != InsertionMode::Text);
2520         m_insertionMode = m_originalInsertionMode;
2521         processEndOfFile(WTFMove(token));
2522         return;
2523     case InsertionMode::TemplateContents:
2524         if (processEndOfFileForInTemplateContents(WTFMove(token)))
2525             return;
2526         break;
2527     }
2528     m_tree.openElements().popAll();
2529 }
2530
2531 void HTMLTreeBuilder::defaultForInitial()
2532 {
2533     notImplemented();
2534     m_tree.setDefaultCompatibilityMode();
2535     // FIXME: parse error
2536     m_insertionMode = InsertionMode::BeforeHTML;
2537 }
2538
2539 void HTMLTreeBuilder::defaultForBeforeHTML()
2540 {
2541     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag->localName());
2542     m_tree.insertHTMLHtmlStartTagBeforeHTML(WTFMove(startHTML));
2543     m_insertionMode = InsertionMode::BeforeHead;
2544 }
2545
2546 void HTMLTreeBuilder::defaultForBeforeHead()
2547 {
2548     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag->localName());
2549     processStartTag(WTFMove(startHead));
2550 }
2551
2552 void HTMLTreeBuilder::defaultForInHead()
2553 {
2554     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag->localName());
2555     processEndTag(WTFMove(endHead));
2556 }
2557
2558 void HTMLTreeBuilder::defaultForInHeadNoscript()
2559 {
2560     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag->localName());
2561     processEndTag(WTFMove(endNoscript));
2562 }
2563
2564 void HTMLTreeBuilder::defaultForAfterHead()
2565 {
2566     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag->localName());
2567     processStartTag(WTFMove(startBody));
2568     m_framesetOk = true;
2569 }
2570
2571 void HTMLTreeBuilder::defaultForInTableText()
2572 {
2573     String characters = m_pendingTableCharacters.toString();
2574     m_pendingTableCharacters.clear();
2575     if (!isAllWhitespace(characters)) {
2576         // FIXME: parse error
2577         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2578         m_tree.reconstructTheActiveFormattingElements();
2579         m_tree.insertTextNode(characters, NotAllWhitespace);
2580         m_framesetOk = false;
2581         m_insertionMode = m_originalInsertionMode;
2582         return;
2583     }
2584     m_tree.insertTextNode(characters);
2585     m_insertionMode = m_originalInsertionMode;
2586 }
2587
2588 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken&& token)
2589 {
2590     ASSERT(token.type() == HTMLToken::StartTag);
2591     if (token.name() == htmlTag) {
2592         processHtmlStartTagForInBody(WTFMove(token));
2593         return true;
2594     }
2595     if (token.name() == baseTag
2596         || token.name() == basefontTag
2597         || token.name() == bgsoundTag
2598         || token.name() == commandTag
2599         || token.name() == linkTag
2600         || token.name() == metaTag) {
2601         m_tree.insertSelfClosingHTMLElement(WTFMove(token));
2602         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2603         return true;
2604     }
2605     if (token.name() == titleTag) {
2606         processGenericRCDATAStartTag(WTFMove(token));
2607         return true;
2608     }
2609     if (token.name() == noscriptTag) {
2610         if (m_options.scriptEnabled) {
2611             processGenericRawTextStartTag(WTFMove(token));
2612             return true;
2613         }
2614         m_tree.insertHTMLElement(WTFMove(token));
2615         m_insertionMode = InsertionMode::InHeadNoscript;
2616         return true;
2617     }
2618     if (token.name() == noframesTag || token.name() == styleTag) {
2619         processGenericRawTextStartTag(WTFMove(token));
2620         return true;
2621     }
2622     if (token.name() == scriptTag) {
2623         bool isSelfClosing = token.selfClosing();
2624         processScriptStartTag(WTFMove(token));
2625         if (m_options.usePreHTML5ParserQuirks && isSelfClosing)
2626             processFakeEndTag(scriptTag);
2627         return true;
2628     }
2629     if (token.name() == templateTag) {
2630         m_framesetOk = false;
2631         processTemplateStartTag(WTFMove(token));
2632         return true;
2633     }
2634     if (token.name() == headTag) {
2635         parseError(token);
2636         return true;
2637     }
2638     return false;
2639 }
2640
2641 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken&& token)
2642 {
2643     ASSERT(token.type() == HTMLToken::StartTag);
2644     m_tree.insertHTMLElement(WTFMove(token));
2645     m_parser.tokenizer().setRCDATAState();
2646     m_originalInsertionMode = m_insertionMode;
2647     m_insertionMode = InsertionMode::Text;
2648 }
2649
2650 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken&& token)
2651 {
2652     ASSERT(token.type() == HTMLToken::StartTag);
2653     m_tree.insertHTMLElement(WTFMove(token));
2654     m_parser.tokenizer().setRAWTEXTState();
2655     m_originalInsertionMode = m_insertionMode;
2656     m_insertionMode = InsertionMode::Text;
2657 }
2658
2659 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken&& token)
2660 {
2661     ASSERT(token.type() == HTMLToken::StartTag);
2662     m_tree.insertScriptElement(WTFMove(token));
2663     m_parser.tokenizer().setScriptDataState();
2664     m_originalInsertionMode = m_insertionMode;
2665
2666     TextPosition position = m_parser.textPosition();
2667
2668     m_scriptToProcessStartPosition = position;
2669
2670     m_insertionMode = InsertionMode::Text;
2671 }
2672
2673 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
2674 HTMLStackItem& HTMLTreeBuilder::adjustedCurrentStackItem() const
2675 {
2676     ASSERT(!m_tree.isEmpty());
2677     if (isParsingFragment() && m_tree.openElements().hasOnlyOneElement())
2678         return m_fragmentContext.contextElementStackItem();
2679
2680     return m_tree.currentStackItem();
2681 }
2682
2683 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2684 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(const AtomicHTMLToken& token)
2685 {
2686     if (m_tree.isEmpty())
2687         return false;
2688     HTMLStackItem& adjustedCurrentNode = adjustedCurrentStackItem();
2689     if (isInHTMLNamespace(adjustedCurrentNode))
2690         return false;
2691     if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2692         if (token.type() == HTMLToken::StartTag
2693             && token.name() != MathMLNames::mglyphTag
2694             && token.name() != MathMLNames::malignmarkTag)
2695             return false;
2696         if (token.type() == HTMLToken::Character)
2697             return false;
2698     }
2699     if (adjustedCurrentNode.hasTagName(MathMLNames::annotation_xmlTag)
2700         && token.type() == HTMLToken::StartTag
2701         && token.name() == SVGNames::svgTag)
2702         return false;
2703     if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2704         if (token.type() == HTMLToken::StartTag)
2705             return false;
2706         if (token.type() == HTMLToken::Character)
2707             return false;
2708     }
2709     if (token.type() == HTMLToken::EndOfFile)
2710         return false;
2711     return true;
2712 }
2713
2714 static bool hasAttribute(const AtomicHTMLToken& token, const QualifiedName& name)
2715 {
2716     return findAttribute(token.attributes(), name);
2717 }
2718
2719 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken&& token)
2720 {
2721     HTMLStackItem& adjustedCurrentNode = adjustedCurrentStackItem();
2722     
2723     switch (token.type()) {
2724     case HTMLToken::Uninitialized:
2725         ASSERT_NOT_REACHED();
2726         break;
2727     case HTMLToken::DOCTYPE:
2728         parseError(token);
2729         break;
2730     case HTMLToken::StartTag: {
2731         if (token.name() == bTag
2732             || token.name() == bigTag
2733             || token.name() == blockquoteTag
2734             || token.name() == bodyTag
2735             || token.name() == brTag
2736             || token.name() == centerTag
2737             || token.name() == codeTag
2738             || token.name() == ddTag
2739             || token.name() == divTag
2740             || token.name() == dlTag
2741             || token.name() == dtTag
2742             || token.name() == emTag
2743             || token.name() == embedTag
2744             || isNumberedHeaderTag(token.name())
2745             || token.name() == headTag
2746             || token.name() == hrTag
2747             || token.name() == iTag
2748             || token.name() == imgTag
2749             || token.name() == liTag
2750             || token.name() == listingTag
2751             || token.name() == menuTag
2752             || token.name() == metaTag
2753             || token.name() == nobrTag
2754             || token.name() == olTag
2755             || token.name() == pTag
2756             || token.name() == preTag
2757             || token.name() == rubyTag
2758             || token.name() == sTag
2759             || token.name() == smallTag
2760             || token.name() == spanTag
2761             || token.name() == strongTag
2762             || token.name() == strikeTag
2763             || token.name() == subTag
2764             || token.name() == supTag
2765             || token.name() == tableTag
2766             || token.name() == ttTag
2767             || token.name() == uTag
2768             || token.name() == ulTag
2769             || token.name() == varTag
2770             || (token.name() == fontTag && (hasAttribute(token, colorAttr) || hasAttribute(token, faceAttr) || hasAttribute(token, sizeAttr)))) {
2771             parseError(token);
2772             m_tree.openElements().popUntilForeignContentScopeMarker();
2773             processStartTag(WTFMove(token));
2774             return;
2775         }
2776         const AtomicString& currentNamespace = adjustedCurrentNode.namespaceURI();
2777         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2778             adjustMathMLAttributes(token);
2779         if (currentNamespace == SVGNames::svgNamespaceURI) {
2780             adjustSVGTagNameCase(token);
2781             adjustSVGAttributes(token);
2782         }
2783         adjustForeignAttributes(token);
2784         m_tree.insertForeignElement(WTFMove(token), currentNamespace);
2785         break;
2786     }
2787     case HTMLToken::EndTag: {
2788         if (adjustedCurrentNode.namespaceURI() == SVGNames::svgNamespaceURI)
2789             adjustSVGTagNameCase(token);
2790
2791         if (token.name() == SVGNames::scriptTag && m_tree.currentStackItem().hasTagName(SVGNames::scriptTag)) {
2792             if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2793                 m_scriptToProcess = &downcast<SVGScriptElement>(m_tree.currentElement());
2794             m_tree.openElements().pop();
2795             return;
2796         }
2797         if (!isInHTMLNamespace(m_tree.currentStackItem())) {
2798             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2799             auto* nodeRecord = &m_tree.openElements().topRecord();
2800             if (nodeRecord->stackItem().localName() != token.name())
2801                 parseError(token);
2802             while (1) {
2803                 if (nodeRecord->stackItem().localName() == token.name()) {
2804                     m_tree.openElements().popUntilPopped(nodeRecord->element());
2805                     return;
2806                 }
2807                 nodeRecord = nodeRecord->next();
2808
2809                 if (isInHTMLNamespace(nodeRecord->stackItem()))
2810                     break;
2811             }
2812         }
2813         // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2814         processEndTag(WTFMove(token));
2815         break;
2816     }
2817     case HTMLToken::Comment:
2818         m_tree.insertComment(WTFMove(token));
2819         return;
2820     case HTMLToken::Character: {
2821         String characters = String(token.characters(), token.charactersLength());
2822         m_tree.insertTextNode(characters);
2823         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2824             m_framesetOk = false;
2825         break;
2826     }
2827     case HTMLToken::EndOfFile:
2828         ASSERT_NOT_REACHED();
2829         break;
2830     }
2831 }
2832
2833 void HTMLTreeBuilder::finished()
2834 {
2835     ASSERT(!m_destroyed);
2836
2837     if (isParsingFragment())
2838         return;
2839
2840     ASSERT(m_templateInsertionModes.isEmpty());
2841
2842     m_tree.finishedParsing();
2843     // The tree builder might have been destroyed as an indirect result of finishing the parsing.
2844 }
2845
2846 inline void HTMLTreeBuilder::parseError(const AtomicHTMLToken&)
2847 {
2848 }
2849
2850 }