Use the original token to create an element in "reconstruct the active formatting...
[WebKit-https.git] / Source / WebCore / html / parser / HTMLConstructionSite.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Element.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLParserIdioms.h"
41 #include "HTMLScriptElement.h"
42 #include "HTMLStackItem.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #if ENABLE(MATHML)
47 #include "MathMLNames.h"
48 #endif
49 #include "NotImplemented.h"
50 #if ENABLE(SVG)
51 #include "SVGNames.h"
52 #endif
53 #include "Settings.h"
54 #include "Text.h"
55 #include <wtf/UnusedParam.h>
56
57 namespace WebCore {
58
59 using namespace HTMLNames;
60
61 namespace {
62
63 bool hasImpliedEndTag(ContainerNode* node)
64 {
65     return node->hasTagName(ddTag)
66         || node->hasTagName(dtTag)
67         || node->hasTagName(liTag)
68         || node->hasTagName(optionTag)
69         || node->hasTagName(optgroupTag)
70         || node->hasTagName(pTag)
71         || node->hasTagName(rpTag)
72         || node->hasTagName(rtTag);
73 }
74
75 bool causesFosterParenting(const QualifiedName& tagName)
76 {
77     return tagName == tableTag
78         || tagName == tbodyTag
79         || tagName == tfootTag
80         || tagName == theadTag
81         || tagName == trTag;
82 }
83
84 inline bool isAllWhitespace(const String& string)
85 {
86     return string.isAllSpecialCharacters<isHTMLSpace>();
87 }
88
89 } // namespace
90
91 static inline void executeTask(HTMLConstructionSiteTask& task)
92 {
93     if (task.nextChild)
94         task.parent->parserInsertBefore(task.child.get(), task.nextChild.get());
95     else
96         task.parent->parserAddChild(task.child.get());
97
98     // JavaScript run from beforeload (or DOM Mutation or event handlers)
99     // might have removed the child, in which case we should not attach it.
100
101     if (task.child->parentNode() && task.parent->attached() && !task.child->attached())
102         task.child->attach();
103
104     task.child->beginParsingChildren();
105
106     if (task.selfClosing)
107         task.child->finishParsingChildren();
108 }
109
110 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing)
111 {
112     HTMLConstructionSiteTask task;
113     task.parent = parent;
114     task.child = prpChild;
115     task.selfClosing = selfClosing;
116
117     if (shouldFosterParent()) {
118         fosterParent(task.child);
119         return;
120     }
121
122     // Add as a sibling of the parent if we have reached the maximum depth allowed.
123     if (m_openElements.stackDepth() > m_maximumDOMTreeDepth && task.parent->parentNode())
124         task.parent = task.parent->parentNode();
125
126     ASSERT(task.parent);
127     m_attachmentQueue.append(task);
128 }
129
130 void HTMLConstructionSite::executeQueuedTasks()
131 {
132     const size_t size = m_attachmentQueue.size();
133     if (!size)
134         return;
135
136     // Copy the task queue into a local variable in case executeTask
137     // re-enters the parser.
138     AttachmentQueue queue;
139     queue.swap(m_attachmentQueue);
140
141     for (size_t i = 0; i < size; ++i)
142         executeTask(queue[i]);
143
144     // We might be detached now.
145 }
146
147 HTMLConstructionSite::HTMLConstructionSite(Document* document, unsigned maximumDOMTreeDepth)
148     : m_document(document)
149     , m_attachmentRoot(document)
150     , m_fragmentScriptingPermission(AllowScriptingContent)
151     , m_isParsingFragment(false)
152     , m_redirectAttachToFosterParent(false)
153     , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
154 {
155 }
156
157 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission, unsigned maximumDOMTreeDepth)
158     : m_document(fragment->document())
159     , m_attachmentRoot(fragment)
160     , m_fragmentScriptingPermission(scriptingPermission)
161     , m_isParsingFragment(true)
162     , m_redirectAttachToFosterParent(false)
163     , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
164 {
165 }
166
167 HTMLConstructionSite::~HTMLConstructionSite()
168 {
169 }
170
171 void HTMLConstructionSite::detach()
172 {
173     m_document = 0;
174     m_attachmentRoot = 0;
175 }
176
177 void HTMLConstructionSite::setForm(HTMLFormElement* form)
178 {
179     // This method should only be needed for HTMLTreeBuilder in the fragment case.
180     ASSERT(!m_form);
181     m_form = form;
182 }
183
184 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
185 {
186     return m_form.release();
187 }
188
189 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
190 {
191     ASSERT(m_document);
192     if (m_document->frame() && !m_isParsingFragment)
193         m_document->frame()->loader()->dispatchDocumentElementAvailable();
194 }
195
196 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
197 {
198     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
199     element->parserSetAttributes(token->attributes(), m_fragmentScriptingPermission);
200     attachLater(m_attachmentRoot, element);
201     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
202
203     executeQueuedTasks();
204     element->insertedByParser();
205     dispatchDocumentElementAvailableIfNeeded();
206 }
207
208 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
209 {
210     if (token->attributes().isEmpty())
211         return;
212
213     ElementAttributeData* elementAttributeData = element->ensureAttributeData();
214
215     for (unsigned i = 0; i < token->attributes().size(); ++i) {
216         const Attribute& tokenAttribute = token->attributes().at(i);
217         if (!elementAttributeData->getAttributeItem(tokenAttribute.name()))
218             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
219     }
220 }
221
222 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
223 {
224     // Fragments do not have a root HTML element, so any additional HTML elements
225     // encountered during fragment parsing should be ignored.
226     if (m_isParsingFragment)
227         return;
228
229     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
230 }
231
232 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
233 {
234     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
235 }
236
237 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
238 {
239     ASSERT(token->type() == HTMLTokenTypes::DOCTYPE);
240
241     RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), String::adopt(token->publicIdentifier()), String::adopt(token->systemIdentifier()));
242     attachLater(m_attachmentRoot, doctype.release());
243
244     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
245     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
246     // because context-less fragments can determine their own quirks mode, and thus change
247     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
248     // in a fragment, as changing the owning document's compatibility mode would be wrong.
249     ASSERT(!m_isParsingFragment);
250     if (m_isParsingFragment)
251         return;
252
253     if (token->forceQuirks())
254         m_document->setCompatibilityMode(Document::QuirksMode);
255     else {
256         // We need to actually add the Doctype node to the DOM.
257         executeQueuedTasks();
258         m_document->setCompatibilityModeFromDoctype();
259     }
260 }
261
262 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
263 {
264     ASSERT(token->type() == HTMLTokenTypes::Comment);
265     attachLater(currentNode(), Comment::create(currentNode()->document(), token->comment()));
266 }
267
268 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
269 {
270     ASSERT(token->type() == HTMLTokenTypes::Comment);
271     attachLater(m_attachmentRoot, Comment::create(m_document, token->comment()));
272 }
273
274 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
275 {
276     ASSERT(token->type() == HTMLTokenTypes::Comment);
277     ContainerNode* parent = m_openElements.rootNode();
278     attachLater(parent, Comment::create(parent->document(), token->comment()));
279 }
280
281 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
282 {
283     ASSERT(!shouldFosterParent());
284     m_head = createHTMLElement(token);
285     attachLater(currentNode(), m_head);
286     m_openElements.pushHTMLHeadElement(HTMLStackItem::create(m_head, token));
287 }
288
289 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
290 {
291     ASSERT(!shouldFosterParent());
292     RefPtr<Element> body = createHTMLElement(token);
293     attachLater(currentNode(), body);
294     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
295 }
296
297 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
298 {
299     RefPtr<Element> element = createHTMLElement(token);
300     ASSERT(element->hasTagName(formTag));
301     m_form = static_pointer_cast<HTMLFormElement>(element.release());
302     m_form->setDemoted(isDemoted);
303     attachLater(currentNode(), m_form);
304     m_openElements.push(HTMLStackItem::create(m_form, token));
305 }
306
307 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
308 {
309     RefPtr<Element> element = createHTMLElement(token);
310     attachLater(currentNode(), element);
311     m_openElements.push(HTMLStackItem::create(element.release(), token));
312 }
313
314 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
315 {
316     ASSERT(token->type() == HTMLTokenTypes::StartTag);
317     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
318     // but self-closing elements are never in the element stack so the stack
319     // doesn't get a chance to tell them that we're done parsing their children.
320     attachLater(currentNode(), createHTMLElement(token), true);
321     // FIXME: Do we want to acknowledge the token's self-closing flag?
322     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
323 }
324
325 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
326 {
327     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
328     // Possible active formatting elements include:
329     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
330     insertHTMLElement(token);
331     m_activeFormattingElements.append(currentElementRecord()->stackItem());
332 }
333
334 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
335 {
336     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
337     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
338     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
339     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
340     // those flags or effects thereof.
341     const bool parserInserted = m_fragmentScriptingPermission != AllowScriptingContentAndDoNotMarkAlreadyStarted;
342     const bool alreadyStarted = m_isParsingFragment && parserInserted;
343     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), parserInserted, alreadyStarted);
344     if (m_fragmentScriptingPermission != DisallowScriptingContent)
345         element->parserSetAttributes(token->attributes(), m_fragmentScriptingPermission);
346     attachLater(currentNode(), element);
347     m_openElements.push(HTMLStackItem::create(element.release(), token));
348 }
349
350 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
351 {
352     ASSERT(token->type() == HTMLTokenTypes::StartTag);
353     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
354
355     RefPtr<Element> element = createElement(token, namespaceURI);
356     attachLater(currentNode(), element, token->selfClosing());
357     if (!token->selfClosing())
358         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
359 }
360
361 void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
362 {
363     HTMLConstructionSiteTask task;
364     task.parent = currentNode();
365
366     if (shouldFosterParent())
367         findFosterSite(task);
368
369     // Strings composed entirely of whitespace are likely to be repeated.
370     // Turn them into AtomicString so we share a single string for each.
371     bool shouldUseAtomicString = whitespaceMode == AllWhitespace
372         || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
373
374     unsigned currentPosition = 0;
375
376     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
377     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
378
379     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
380     if (previousChild && previousChild->isTextNode()) {
381         // FIXME: We're only supposed to append to this text node if it
382         // was the last text node inserted by the parser.
383         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
384         currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
385     }
386
387     while (currentPosition < characters.length()) {
388         RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition);
389         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
390         if (!textNode->length()) {
391             String substring = characters.substring(currentPosition);
392             textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
393         }
394
395         currentPosition += textNode->length();
396         ASSERT(currentPosition <= characters.length());
397         task.child = textNode.release();
398         executeTask(task);
399     }
400 }
401
402 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
403 {
404     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
405     RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
406     element->parserSetAttributes(token->attributes(), m_fragmentScriptingPermission);
407     return element.release();
408 }
409
410 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
411 {
412     QualifiedName tagName(nullAtom, token->name(), xhtmlNamespaceURI);
413     // FIXME: This can't use HTMLConstructionSite::createElement because we
414     // have to pass the current form element.  We should rework form association
415     // to occur after construction to allow better code sharing here.
416     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
417     element->parserSetAttributes(token->attributes(), m_fragmentScriptingPermission);
418     ASSERT(element->isHTMLElement());
419     return element.release();
420 }
421
422 PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
423 {
424     RefPtr<Element> element;
425     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
426         element = createHTMLElement(item->token());
427     else
428         element = createElement(item->token(), item->namespaceURI());
429     return HTMLStackItem::create(element.release(), item->token(), item->namespaceURI());
430 }
431
432 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
433 {
434     if (m_activeFormattingElements.isEmpty())
435         return false;
436     unsigned index = m_activeFormattingElements.size();
437     do {
438         --index;
439         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
440         if (entry.isMarker() || m_openElements.contains(entry.element())) {
441             firstUnopenElementIndex = index + 1;
442             return firstUnopenElementIndex < m_activeFormattingElements.size();
443         }
444     } while (index);
445     firstUnopenElementIndex = index;
446     return true;
447 }
448
449 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
450 {
451     unsigned firstUnopenElementIndex;
452     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
453         return;
454
455     unsigned unopenEntryIndex = firstUnopenElementIndex;
456     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
457     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
458         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
459         RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
460         attachLater(currentNode(), reconstructed->node());
461         m_openElements.push(reconstructed);
462         unopenedEntry.replaceElement(reconstructed.release());
463     }
464 }
465
466 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
467 {
468     while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
469         m_openElements.pop();
470 }
471
472 void HTMLConstructionSite::generateImpliedEndTags()
473 {
474     while (hasImpliedEndTag(currentNode()))
475         m_openElements.pop();
476 }
477
478 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
479 {
480     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
481     if (lastTableElementRecord) {
482         Element* lastTableElement = lastTableElementRecord->element();
483         if (ContainerNode* parent = lastTableElement->parentNode()) {
484             task.parent = parent;
485             task.nextChild = lastTableElement;
486             return;
487         }
488         task.parent = lastTableElementRecord->next()->element();
489         return;
490     }
491     // Fragment case
492     task.parent = m_openElements.rootNode(); // DocumentFragment
493 }
494
495 bool HTMLConstructionSite::shouldFosterParent() const
496 {
497     return m_redirectAttachToFosterParent
498         && currentNode()->isElementNode()
499         && causesFosterParenting(currentElement()->tagQName());
500 }
501
502 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
503 {
504     HTMLConstructionSiteTask task;
505     findFosterSite(task);
506     task.child = node;
507     ASSERT(task.parent);
508     m_attachmentQueue.append(task);
509 }
510
511 }