2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "HTMLTreeBuilder.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLToken.h"
42 #include "HTMLTokenizer.h"
43 #include "LocalizedStrings.h"
45 #include "MathMLNames.h"
47 #include "NotImplemented.h"
51 #include "ScriptController.h"
54 #include <wtf/UnusedParam.h>
58 using namespace HTMLNames;
62 bool hasImpliedEndTag(ContainerNode* node)
64 return node->hasTagName(ddTag)
65 || node->hasTagName(dtTag)
66 || node->hasTagName(liTag)
67 || node->hasTagName(optionTag)
68 || node->hasTagName(optgroupTag)
69 || node->hasTagName(pTag)
70 || node->hasTagName(rpTag)
71 || node->hasTagName(rtTag);
74 bool causesFosterParenting(const QualifiedName& tagName)
76 return tagName == tableTag
77 || tagName == tbodyTag
78 || tagName == tfootTag
79 || tagName == theadTag
85 template<typename ChildType>
86 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
88 RefPtr<ChildType> child = prpChild;
90 // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
91 // redirection to the foster parent but HTMLConstructionSite::attachAtSite
92 // doesn't. It feels like we're missing a concept somehow.
93 if (shouldFosterParent()) {
94 fosterParent(child.get());
95 ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
96 return child.release();
99 parent->parserAddChild(child);
101 // An event handler (DOM Mutation, beforeload, et al.) could have removed
102 // the child, in which case we shouldn't try attaching it.
103 if (!child->parentNode())
104 return child.release();
106 // It's slightly unfortunate that we need to hold a reference to child
107 // here to call attach(). We should investigate whether we can rely on
108 // |parent| to hold a ref at this point. In the common case (at least
109 // for elements), however, we'll get to use this ref in the stack of
111 if (parent->attached() && !child->attached())
113 return child.release();
116 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
118 // FIXME: It's unfortunate that we need to hold a reference to child
119 // here to call attach(). We should investigate whether we can rely on
120 // |site.parent| to hold a ref at this point.
121 RefPtr<Node> child = prpChild;
124 site.parent->parserInsertBefore(child, site.nextChild);
126 site.parent->parserAddChild(child);
128 // JavaScript run from beforeload (or DOM Mutation or event handlers)
129 // might have removed the child, in which case we should not attach it.
130 if (child->parentNode() && site.parent->attached() && !child->attached())
134 HTMLConstructionSite::HTMLConstructionSite(Document* document)
135 : m_document(document)
136 , m_attachmentRoot(document)
137 , m_fragmentScriptingPermission(FragmentScriptingAllowed)
138 , m_isParsingFragment(false)
139 , m_redirectAttachToFosterParent(false)
143 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
144 : m_document(fragment->document())
145 , m_attachmentRoot(fragment)
146 , m_fragmentScriptingPermission(scriptingPermission)
147 , m_isParsingFragment(true)
148 , m_redirectAttachToFosterParent(false)
152 HTMLConstructionSite::~HTMLConstructionSite()
156 void HTMLConstructionSite::detach()
159 m_attachmentRoot = 0;
162 void HTMLConstructionSite::setForm(HTMLFormElement* form)
164 // This method should only be needed for HTMLTreeBuilder in the fragment case.
169 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
171 return m_form.release();
174 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
177 if (m_document->frame() && !m_isParsingFragment)
178 m_document->frame()->loader()->dispatchDocumentElementAvailable();
181 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
183 RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
184 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
185 m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
186 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
187 element->insertedByParser();
189 dispatchDocumentElementAvailableIfNeeded();
192 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
194 if (!token.attributes())
197 NamedNodeMap* attributes = element->attributes(false);
198 for (unsigned i = 0; i < token.attributes()->length(); ++i) {
199 Attribute* attribute = token.attributes()->attributeItem(i);
200 if (!attributes->getAttributeItem(attribute->name()))
201 element->setAttribute(attribute->name(), attribute->value());
205 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
207 // FIXME: parse error
209 // Fragments do not have a root HTML element, so any additional HTML elements
210 // encountered during fragment parsing should be ignored.
211 if (m_isParsingFragment)
214 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
217 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
219 // FIXME: parse error
220 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
223 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
225 ASSERT(token.type() == HTMLToken::DOCTYPE);
226 attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
228 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
229 // never occurs. However, if we ever chose to support such, this code is subtly wrong,
230 // because context-less fragments can determine their own quirks mode, and thus change
231 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code
232 // in a fragment, as changing the owning document's compatibility mode would be wrong.
233 ASSERT(!m_isParsingFragment);
234 if (m_isParsingFragment)
237 if (token.forceQuirks())
238 m_document->setCompatibilityMode(Document::QuirksMode);
240 m_document->setCompatibilityModeFromDoctype();
243 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
245 ASSERT(token.type() == HTMLToken::Comment);
246 attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
249 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
251 ASSERT(token.type() == HTMLToken::Comment);
252 attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
255 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
257 ASSERT(token.type() == HTMLToken::Comment);
258 ContainerNode* parent = m_openElements.rootNode();
259 attach(parent, Comment::create(parent->document(), token.comment()));
262 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
264 return attach(currentNode(), child);
267 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
269 ASSERT(!shouldFosterParent());
270 m_head = attachToCurrent(createHTMLElement(token));
271 m_openElements.pushHTMLHeadElement(m_head);
274 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
276 ASSERT(!shouldFosterParent());
277 m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
280 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
282 RefPtr<Element> element = createHTMLElement(token);
283 ASSERT(element->hasTagName(formTag));
284 RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
285 form->setDemoted(isDemoted);
286 m_openElements.push(attachToCurrent(form.release()));
287 ASSERT(currentElement()->isHTMLElement());
288 ASSERT(currentElement()->hasTagName(formTag));
289 m_form = static_cast<HTMLFormElement*>(currentElement());
292 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
294 m_openElements.push(attachToCurrent(createHTMLElement(token)));
297 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
299 ASSERT(token.type() == HTMLToken::StartTag);
300 RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
301 // Normally HTMLElementStack is responsible for calling finishParsingChildren,
302 // but self-closing elements are never in the element stack so the stack
303 // doesn't get a chance to tell them that we're done parsing their children.
304 element->finishParsingChildren();
305 // FIXME: Do we want to acknowledge the token's self-closing flag?
306 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
309 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
311 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
312 // Possible active formatting elements include:
313 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
314 insertHTMLElement(token);
315 m_activeFormattingElements.append(currentElement());
318 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
320 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
321 if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
322 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
323 m_openElements.push(attachToCurrent(element.release()));
326 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
328 ASSERT(token.type() == HTMLToken::StartTag);
329 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
331 RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
332 if (!token.selfClosing())
333 m_openElements.push(element);
336 void HTMLConstructionSite::insertTextNode(const String& characters)
339 site.parent = currentNode();
341 if (shouldFosterParent())
342 findFosterSite(site);
344 Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
345 if (previousChild && previousChild->isTextNode()) {
346 // FIXME: We're only supposed to append to this text node if it
347 // was the last text node inserted by the parser.
348 CharacterData* textNode = static_cast<CharacterData*>(previousChild);
349 textNode->parserAppendData(characters);
353 attachAtSite(site, Text::create(site.parent->document(), characters));
356 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
358 QualifiedName tagName(nullAtom, token.name(), namespaceURI);
359 RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
360 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
361 return element.release();
364 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
366 QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
367 // FIXME: This can't use HTMLConstructionSite::createElement because we
368 // have to pass the current form element. We should rework form association
369 // to occur after construction to allow better code sharing here.
370 RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
371 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
372 ASSERT(element->isHTMLElement());
373 return element.release();
376 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
378 return createHTMLElementFromSavedElement(record->element());
383 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
385 NamedNodeMap* attributes = element->attributes(true);
389 RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
390 for (size_t i = 0; i < attributes->length(); ++i) {
391 Attribute* attribute = attributes->attributeItem(i);
392 RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
393 newAttributes->addAttribute(clone);
395 return newAttributes.release();
400 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
402 // FIXME: This method is wrong. We should be using the original token.
403 // Using an Element* causes us to fail examples like this:
404 // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
405 // When reconstructTheActiveFormattingElements calls this method to open
406 // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
407 // spec implies it should be "1". Minefield matches the HTML5 spec here.
409 ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
410 AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
411 return createHTMLElement(fakeToken);
414 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
416 if (m_activeFormattingElements.isEmpty())
418 unsigned index = m_activeFormattingElements.size();
421 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
422 if (entry.isMarker() || m_openElements.contains(entry.element())) {
423 firstUnopenElementIndex = index + 1;
424 return firstUnopenElementIndex < m_activeFormattingElements.size();
427 firstUnopenElementIndex = index;
431 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
433 unsigned firstUnopenElementIndex;
434 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
437 unsigned unopenEntryIndex = firstUnopenElementIndex;
438 ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
439 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
440 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
441 RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
442 m_openElements.push(attachToCurrent(reconstructed.release()));
443 unopenedEntry.replaceElement(currentElement());
447 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
449 while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
450 m_openElements.pop();
453 void HTMLConstructionSite::generateImpliedEndTags()
455 while (hasImpliedEndTag(currentNode()))
456 m_openElements.pop();
459 void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
461 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
462 if (lastTableElementRecord) {
463 Element* lastTableElement = lastTableElementRecord->element();
464 if (ContainerNode* parent = lastTableElement->parentNode()) {
465 site.parent = parent;
466 site.nextChild = lastTableElement;
469 site.parent = lastTableElementRecord->next()->element();
474 site.parent = m_openElements.rootNode(); // DocumentFragment
478 bool HTMLConstructionSite::shouldFosterParent() const
480 return m_redirectAttachToFosterParent
481 && currentNode()->isElementNode()
482 && causesFosterParenting(currentElement()->tagQName());
485 void HTMLConstructionSite::fosterParent(Node* node)
488 findFosterSite(site);
489 attachAtSite(site, node);