Fix assert when foster parenting self-closing elements
[WebKit-https.git] / Source / WebCore / html / parser / HTMLConstructionSite.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Element.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLParserIdioms.h"
41 #include "HTMLScriptElement.h"
42 #include "HTMLToken.h"
43 #include "HTMLTokenizer.h"
44 #include "LocalizedStrings.h"
45 #if ENABLE(MATHML)
46 #include "MathMLNames.h"
47 #endif
48 #include "NotImplemented.h"
49 #if ENABLE(SVG)
50 #include "SVGNames.h"
51 #endif
52 #include "Settings.h"
53 #include "Text.h"
54 #include <wtf/UnusedParam.h>
55
56 namespace WebCore {
57
58 using namespace HTMLNames;
59
60 namespace {
61
62 bool hasImpliedEndTag(ContainerNode* node)
63 {
64     return node->hasTagName(ddTag)
65         || node->hasTagName(dtTag)
66         || node->hasTagName(liTag)
67         || node->hasTagName(optionTag)
68         || node->hasTagName(optgroupTag)
69         || node->hasTagName(pTag)
70         || node->hasTagName(rpTag)
71         || node->hasTagName(rtTag);
72 }
73
74 bool causesFosterParenting(const QualifiedName& tagName)
75 {
76     return tagName == tableTag
77         || tagName == tbodyTag
78         || tagName == tfootTag
79         || tagName == theadTag
80         || tagName == trTag;
81 }
82
83 inline bool isAllWhitespace(const String& string)
84 {
85     return string.isAllSpecialCharacters<isHTMLSpace>();
86 }
87
88 } // namespace
89
90 static inline void executeTask(HTMLConstructionSiteTask& task)
91 {
92     if (task.nextChild)
93         task.parent->parserInsertBefore(task.child.get(), task.nextChild.get());
94     else
95         task.parent->parserAddChild(task.child.get());
96
97     // JavaScript run from beforeload (or DOM Mutation or event handlers)
98     // might have removed the child, in which case we should not attach it.
99
100     if (task.child->parentNode() && task.parent->attached() && !task.child->attached())
101         task.child->attach();
102
103     task.child->beginParsingChildren();
104
105     if (task.selfClosing)
106         task.child->finishParsingChildren();
107 }
108
109 // FIXME: This function should return void. Callers should keep a pointer to
110 // the child if they want one.
111 template<typename ChildType>
112 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
113 {
114     RefPtr<ChildType> child = prpChild;
115
116     HTMLConstructionSiteTask task;
117     task.parent = parent;
118     task.child = child.get();
119
120     if (shouldFosterParent()) {
121         fosterParent(task.child);
122         return child.release();
123     }
124
125     // Add as a sibling of the parent if we have reached the maximum depth allowed.
126     if (m_openElements.stackDepth() > m_maximumDOMTreeDepth)
127         task.parent = task.parent->parentNode();
128
129     m_attachmentQueue.append(task);
130     return child.release();
131 }
132
133 void HTMLConstructionSite::executeQueuedTasks()
134 {
135     const size_t size = m_attachmentQueue.size();
136     if (!size)
137         return;
138
139     // Copy the task queue into a local variable in case executeTask
140     // re-enters the parser.
141     AttachmentQueue queue;
142     queue.swap(m_attachmentQueue);
143
144     for (size_t i = 0; i < size; ++i)
145         executeTask(queue[i]);
146
147     // We might be detached now.
148 }
149
150 HTMLConstructionSite::HTMLConstructionSite(Document* document, unsigned maximumDOMTreeDepth)
151     : m_document(document)
152     , m_attachmentRoot(document)
153     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
154     , m_isParsingFragment(false)
155     , m_redirectAttachToFosterParent(false)
156     , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
157 {
158 }
159
160 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission, unsigned maximumDOMTreeDepth)
161     : m_document(fragment->document())
162     , m_attachmentRoot(fragment)
163     , m_fragmentScriptingPermission(scriptingPermission)
164     , m_isParsingFragment(true)
165     , m_redirectAttachToFosterParent(false)
166     , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
167 {
168 }
169
170 HTMLConstructionSite::~HTMLConstructionSite()
171 {
172 }
173
174 void HTMLConstructionSite::detach()
175 {
176     m_document = 0;
177     m_attachmentRoot = 0;
178 }
179
180 void HTMLConstructionSite::setForm(HTMLFormElement* form)
181 {
182     // This method should only be needed for HTMLTreeBuilder in the fragment case.
183     ASSERT(!m_form);
184     m_form = form;
185 }
186
187 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
188 {
189     return m_form.release();
190 }
191
192 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
193 {
194     ASSERT(m_document);
195     if (m_document->frame() && !m_isParsingFragment)
196         m_document->frame()->loader()->dispatchDocumentElementAvailable();
197 }
198
199 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
200 {
201     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
202     element->parserSetAttributeMap(token.takeAttributes(), m_fragmentScriptingPermission);
203     m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
204     element->insertedByParser();
205     dispatchDocumentElementAvailableIfNeeded();
206 }
207
208 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
209 {
210     if (!token.attributes())
211         return;
212
213     NamedNodeMap* attributes = element->attributes(false);
214     for (unsigned i = 0; i < token.attributes()->length(); ++i) {
215         Attribute* attribute = token.attributes()->attributeItem(i);
216         if (!attributes->getAttributeItem(attribute->name()))
217             element->setAttribute(attribute->name(), attribute->value());
218     }
219 }
220
221 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
222 {
223     // FIXME: parse error
224     
225     // Fragments do not have a root HTML element, so any additional HTML elements
226     // encountered during fragment parsing should be ignored.
227     if (m_isParsingFragment)
228         return;
229
230     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
231 }
232
233 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
234 {
235     // FIXME: parse error
236     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
237 }
238
239 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
240 {
241     ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
242     attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
243
244     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
245     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
246     // because context-less fragments can determine their own quirks mode, and thus change
247     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
248     // in a fragment, as changing the owning document's compatibility mode would be wrong.
249     ASSERT(!m_isParsingFragment);
250     if (m_isParsingFragment)
251         return;
252
253     if (token.forceQuirks())
254         m_document->setCompatibilityMode(Document::QuirksMode);
255     else {
256         // We need to actually add the Doctype node to the DOM.
257         executeQueuedTasks();
258         m_document->setCompatibilityModeFromDoctype();
259     }
260 }
261
262 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
263 {
264     ASSERT(token.type() == HTMLTokenTypes::Comment);
265     attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
266 }
267
268 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
269 {
270     ASSERT(token.type() == HTMLTokenTypes::Comment);
271     attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
272 }
273
274 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
275 {
276     ASSERT(token.type() == HTMLTokenTypes::Comment);
277     ContainerNode* parent = m_openElements.rootNode();
278     attach(parent, Comment::create(parent->document(), token.comment()));
279 }
280
281 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
282 {
283     return attach(currentNode(), child);
284 }
285
286 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
287 {
288     ASSERT(!shouldFosterParent());
289     m_head = attachToCurrent(createHTMLElement(token));
290     m_openElements.pushHTMLHeadElement(m_head);
291 }
292
293 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
294 {
295     ASSERT(!shouldFosterParent());
296     m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
297 }
298
299 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
300 {
301     RefPtr<Element> element = createHTMLElement(token);
302     ASSERT(element->hasTagName(formTag));
303     RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
304     form->setDemoted(isDemoted);
305     m_openElements.push(attachToCurrent(form.release()));
306     ASSERT(currentElement()->isHTMLElement());
307     ASSERT(currentElement()->hasTagName(formTag));
308     m_form = static_cast<HTMLFormElement*>(currentElement());
309 }
310
311 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
312 {
313     m_openElements.push(attachToCurrent(createHTMLElement(token)));
314 }
315
316 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
317 {
318     ASSERT(token.type() == HTMLTokenTypes::StartTag);
319     RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
320     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
321     // but self-closing elements are never in the element stack so the stack
322     // doesn't get a chance to tell them that we're done parsing their children.
323     m_attachmentQueue.last().selfClosing = true;
324     // FIXME: Do we want to acknowledge the token's self-closing flag?
325     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
326 }
327
328 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
329 {
330     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
331     // Possible active formatting elements include:
332     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
333     insertHTMLElement(token);
334     m_activeFormattingElements.append(currentElement());
335 }
336
337 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
338 {
339     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
340     if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
341         element->parserSetAttributeMap(token.takeAttributes(), m_fragmentScriptingPermission);
342     m_openElements.push(attachToCurrent(element.release()));
343 }
344
345 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
346 {
347     ASSERT(token.type() == HTMLTokenTypes::StartTag);
348     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
349
350     RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
351     if (!token.selfClosing())
352         m_openElements.push(element);
353 }
354
355 void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
356 {
357     HTMLConstructionSiteTask task;
358     task.parent = currentNode();
359     task.nextChild = 0;
360     if (shouldFosterParent())
361         findFosterSite(task);
362
363     // Strings composed entirely of whitespace are likely to be repeated.
364     // Turn them into AtomicString so we share a single string for each.
365     bool shouldUseAtomicString = whitespaceMode == AllWhitespace
366         || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
367
368     unsigned currentPosition = 0;
369
370     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
371     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
372
373     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
374     if (previousChild && previousChild->isTextNode()) {
375         // FIXME: We're only supposed to append to this text node if it
376         // was the last text node inserted by the parser.
377         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
378         currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
379     }
380
381     while (currentPosition < characters.length()) {
382         RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition);
383         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
384         if (!textNode->length()) {
385             String substring = characters.substring(currentPosition);
386             textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
387         }
388
389         currentPosition += textNode->length();
390         ASSERT(currentPosition <= characters.length());
391         task.child = textNode.release();
392         executeTask(task);
393     }
394 }
395
396 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
397 {
398     QualifiedName tagName(nullAtom, token.name(), namespaceURI);
399     RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
400     element->parserSetAttributeMap(token.takeAttributes(), m_fragmentScriptingPermission);
401     return element.release();
402 }
403
404 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
405 {
406     QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
407     // FIXME: This can't use HTMLConstructionSite::createElement because we
408     // have to pass the current form element.  We should rework form association
409     // to occur after construction to allow better code sharing here.
410     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
411     element->parserSetAttributeMap(token.takeAttributes(), m_fragmentScriptingPermission);
412     ASSERT(element->isHTMLElement());
413     return element.release();
414 }
415
416 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
417 {
418     return createHTMLElementFromSavedElement(record->element());
419 }
420
421 namespace {
422
423 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
424 {
425     NamedNodeMap* attributes = element->attributes(true);
426     if (!attributes)
427         return 0;
428
429     RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
430     for (size_t i = 0; i < attributes->length(); ++i) {
431         Attribute* attribute = attributes->attributeItem(i);
432         RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
433         newAttributes->addAttribute(clone);
434     }
435     return newAttributes.release();
436 }
437
438 }
439
440 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
441 {
442     // FIXME: This method is wrong.  We should be using the original token.
443     // Using an Element* causes us to fail examples like this:
444     // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
445     // When reconstructTheActiveFormattingElements calls this method to open
446     // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
447     // spec implies it should be "1".  Minefield matches the HTML5 spec here.
448
449     ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
450     AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, element->localName(), cloneAttributes(element));
451     return createHTMLElement(fakeToken);
452 }
453
454 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
455 {
456     if (m_activeFormattingElements.isEmpty())
457         return false;
458     unsigned index = m_activeFormattingElements.size();
459     do {
460         --index;
461         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
462         if (entry.isMarker() || m_openElements.contains(entry.element())) {
463             firstUnopenElementIndex = index + 1;
464             return firstUnopenElementIndex < m_activeFormattingElements.size();
465         }
466     } while (index);
467     firstUnopenElementIndex = index;
468     return true;
469 }
470
471 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
472 {
473     unsigned firstUnopenElementIndex;
474     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
475         return;
476
477     unsigned unopenEntryIndex = firstUnopenElementIndex;
478     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
479     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
480         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
481         RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
482         m_openElements.push(attachToCurrent(reconstructed.release()));
483         unopenedEntry.replaceElement(currentElement());
484     }
485 }
486
487 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
488 {
489     while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
490         m_openElements.pop();
491 }
492
493 void HTMLConstructionSite::generateImpliedEndTags()
494 {
495     while (hasImpliedEndTag(currentNode()))
496         m_openElements.pop();
497 }
498
499 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
500 {
501     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
502     if (lastTableElementRecord) {
503         Element* lastTableElement = lastTableElementRecord->element();
504         if (ContainerNode* parent = lastTableElement->parentNode()) {
505             task.parent = parent;
506             task.nextChild = lastTableElement;
507             return;
508         }
509         task.parent = lastTableElementRecord->next()->element();
510         return;
511     }
512     // Fragment case
513     task.parent = m_openElements.rootNode(); // DocumentFragment
514 }
515
516 bool HTMLConstructionSite::shouldFosterParent() const
517 {
518     return m_redirectAttachToFosterParent
519         && currentNode()->isElementNode()
520         && causesFosterParenting(currentElement()->tagQName());
521 }
522
523 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
524 {
525     HTMLConstructionSiteTask task;
526     task.nextChild = 0;
527     findFosterSite(task);
528     task.child = node;
529     m_attachmentQueue.append(task);
530 }
531
532 }