Move URL from WebCore to WTF
[WebKit-https.git] / Source / WebCore / editing / markup.cpp
1 /*
2  * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved.
4  * Copyright (C) 2011 Igalia S.L.
5  * Copyright (C) 2011 Motorola Mobility. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
27  */
28
29 #include "config.h"
30 #include "markup.h"
31
32 #include "ArchiveResource.h"
33 #include "CSSPrimitiveValue.h"
34 #include "CSSPropertyNames.h"
35 #include "CSSValue.h"
36 #include "CSSValueKeywords.h"
37 #include "CacheStorageProvider.h"
38 #include "ChildListMutationScope.h"
39 #include "Comment.h"
40 #include "ComposedTreeIterator.h"
41 #include "DocumentFragment.h"
42 #include "DocumentLoader.h"
43 #include "DocumentType.h"
44 #include "Editing.h"
45 #include "Editor.h"
46 #include "EditorClient.h"
47 #include "ElementIterator.h"
48 #include "EmptyClients.h"
49 #include "File.h"
50 #include "Frame.h"
51 #include "FrameLoader.h"
52 #include "HTMLAttachmentElement.h"
53 #include "HTMLBRElement.h"
54 #include "HTMLBodyElement.h"
55 #include "HTMLDivElement.h"
56 #include "HTMLHeadElement.h"
57 #include "HTMLHtmlElement.h"
58 #include "HTMLImageElement.h"
59 #include "HTMLNames.h"
60 #include "HTMLStyleElement.h"
61 #include "HTMLTableElement.h"
62 #include "HTMLTextAreaElement.h"
63 #include "HTMLTextFormControlElement.h"
64 #include "LibWebRTCProvider.h"
65 #include "MarkupAccumulator.h"
66 #include "NodeList.h"
67 #include "Page.h"
68 #include "PageConfiguration.h"
69 #include "Range.h"
70 #include "RenderBlock.h"
71 #include "RuntimeEnabledFeatures.h"
72 #include "Settings.h"
73 #include "SocketProvider.h"
74 #include "StyleProperties.h"
75 #include "TextIterator.h"
76 #include "TypedElementDescendantIterator.h"
77 #include "VisibleSelection.h"
78 #include "VisibleUnits.h"
79 #include <wtf/StdLibExtras.h>
80 #include <wtf/URL.h>
81 #include <wtf/URLParser.h>
82 #include <wtf/text/StringBuilder.h>
83
84 namespace WebCore {
85
86 using namespace HTMLNames;
87
88 static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID);
89
90 class AttributeChange {
91 public:
92     AttributeChange()
93         : m_name(nullAtom(), nullAtom(), nullAtom())
94     {
95     }
96
97     AttributeChange(Element* element, const QualifiedName& name, const String& value)
98         : m_element(element), m_name(name), m_value(value)
99     {
100     }
101
102     void apply()
103     {
104         m_element->setAttribute(m_name, m_value);
105     }
106
107 private:
108     RefPtr<Element> m_element;
109     QualifiedName m_name;
110     String m_value;
111 };
112
113 static void completeURLs(DocumentFragment* fragment, const String& baseURL)
114 {
115     Vector<AttributeChange> changes;
116
117     URL parsedBaseURL({ }, baseURL);
118
119     for (auto& element : descendantsOfType<Element>(*fragment)) {
120         if (!element.hasAttributes())
121             continue;
122         for (const Attribute& attribute : element.attributesIterator()) {
123             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty())
124                 changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute)));
125         }
126     }
127
128     for (auto& change : changes)
129         change.apply();
130 }
131
132 void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomicString, AtomicString>&& replacementMap)
133 {
134     Vector<AttributeChange> changes;
135     for (auto& element : descendantsOfType<Element>(fragment)) {
136         if (!element.hasAttributes())
137             continue;
138         for (const Attribute& attribute : element.attributesIterator()) {
139             // FIXME: This won't work for srcset.
140             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
141                 auto replacement = replacementMap.get(attribute.value());
142                 if (!replacement.isNull())
143                     changes.append({ &element, attribute.name(), replacement });
144             }
145         }
146     }
147     for (auto& change : changes)
148         change.apply();
149 }
150
151 struct ElementAttribute {
152     Ref<Element> element;
153     QualifiedName attributeName;
154 };
155
156 void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL)
157 {
158     Vector<ElementAttribute> attributesToRemove;
159     for (auto& element : descendantsOfType<Element>(fragment)) {
160         if (!element.hasAttributes())
161             continue;
162         for (const Attribute& attribute : element.attributesIterator()) {
163             // FIXME: This won't work for srcset.
164             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
165                 URL url({ }, attribute.value());
166                 if (shouldRemoveURL(url))
167                     attributesToRemove.append({ element, attribute.name() });
168             }
169         }
170     }
171     for (auto& item : attributesToRemove)
172         item.element->removeAttribute(item.attributeName);
173 }
174
175 std::unique_ptr<Page> createPageForSanitizingWebContent()
176 {
177     auto pageConfiguration = pageConfigurationWithEmptyClients();
178     
179     auto page = std::make_unique<Page>(WTFMove(pageConfiguration));
180     page->settings().setMediaEnabled(false);
181     page->settings().setScriptEnabled(false);
182     page->settings().setPluginsEnabled(false);
183     page->settings().setAcceleratedCompositingEnabled(false);
184
185     Frame& frame = page->mainFrame();
186     frame.setView(FrameView::create(frame));
187     frame.init();
188
189     FrameLoader& loader = frame.loader();
190     static char markup[] = "<!DOCTYPE html><html><body></body></html>";
191     ASSERT(loader.activeDocumentLoader());
192     auto& writer = loader.activeDocumentLoader()->writer();
193     writer.setMIMEType("text/html");
194     writer.begin();
195     writer.insertDataSynchronously(String(markup));
196     writer.end();
197     RELEASE_ASSERT(page->mainFrame().document()->body());
198
199     return page;
200 }
201
202 String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, std::optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
203 {
204     auto page = createPageForSanitizingWebContent();
205     Document* stagingDocument = page->mainFrame().document();
206     ASSERT(stagingDocument);
207
208     auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent);
209
210     if (fragmentSanitizer)
211         (*fragmentSanitizer)(fragment);
212
213     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);
214 }
215
216 enum class MSOListMode { Preserve, DoNotPreserve };
217 class StyledMarkupAccumulator final : public MarkupAccumulator {
218 public:
219     enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode };
220
221     StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree,
222         AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr);
223
224     Node* serializeNodes(const Position& start, const Position& end);
225     void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode);
226     void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false);
227     String takeResults();
228     
229     bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; }
230     bool needClearingDiv() const { return m_needClearingDiv; }
231
232     using MarkupAccumulator::appendString;
233
234     ContainerNode* parentNode(Node& node)
235     {
236         if (UNLIKELY(m_useComposedTree))
237             return node.parentInComposedTree();
238         return node.parentOrShadowHostNode();
239     }
240
241 private:
242     void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false);
243     const String& styleNodeCloseTag(bool isBlock = false);
244
245     String renderedTextRespectingRange(const Text&);
246     String textContentRespectingRange(const Text&);
247
248     bool shouldPreserveMSOListStyleForElement(const Element&);
249
250     void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode);
251     void appendEndTag(StringBuilder& out, const Element&) override;
252     void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override;
253
254     void appendText(StringBuilder& out, const Text&) override;
255     void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override
256     {
257         appendStartTag(out, element, false, DoesFullySelectNode);
258     }
259
260     Node* firstChild(Node& node)
261     {
262         if (UNLIKELY(m_useComposedTree))
263             return firstChildInComposedTreeIgnoringUserAgentShadow(node);
264         return node.firstChild();
265     }
266
267     Node* nextSibling(Node& node)
268     {
269         if (UNLIKELY(m_useComposedTree))
270             return nextSiblingInComposedTreeIgnoringUserAgentShadow(node);
271         return node.nextSibling();
272     }
273     
274     Node* nextSkippingChildren(Node& node)
275     {
276         if (UNLIKELY(m_useComposedTree))
277             return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node);
278         return NodeTraversal::nextSkippingChildren(node);
279     }
280
281     bool hasChildNodes(Node& node)
282     {
283         if (UNLIKELY(m_useComposedTree))
284             return firstChildInComposedTreeIgnoringUserAgentShadow(node);
285         return node.hasChildNodes();
286     }
287
288     bool isDescendantOf(Node& node, Node& possibleAncestor)
289     {
290         if (UNLIKELY(m_useComposedTree))
291             return node.isDescendantOrShadowDescendantOf(&possibleAncestor);
292         return node.isDescendantOf(&possibleAncestor);
293     }
294
295     enum class NodeTraversalMode { EmitString, DoNotEmitString };
296     Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode);
297
298     bool appendNodeToPreserveMSOList(Node&);
299
300     bool shouldAnnotate()
301     {
302         return m_annotate == AnnotateForInterchange::Yes;
303     }
304
305     bool shouldApplyWrappingStyle(const Node& node) const
306     {
307         return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style();
308     }
309
310     Position m_start;
311     Position m_end;
312     Vector<String> m_reversedPrecedingMarkup;
313     const AnnotateForInterchange m_annotate;
314     RefPtr<Node> m_highestNodeToBeSerialized;
315     RefPtr<EditingStyle> m_wrappingStyle;
316     bool m_useComposedTree;
317     bool m_needsPositionStyleConversion;
318     bool m_needRelativeStyleWrapper { false };
319     bool m_needClearingDiv { false };
320     bool m_shouldPreserveMSOList;
321     bool m_inMSOList { false };
322 };
323
324 inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
325     AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized)
326     : MarkupAccumulator(nodes, urlsToResolve)
327     , m_start(start)
328     , m_end(end)
329     , m_annotate(annotate)
330     , m_highestNodeToBeSerialized(highestNodeToBeSerialized)
331     , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes)
332     , m_needsPositionStyleConversion(needsPositionStyleConversion)
333     , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve)
334 {
335 }
336
337 void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode)
338 {
339     StringBuilder markup;
340     if (is<Element>(node))
341         appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode);
342     else
343         appendNonElementNode(markup, node, nullptr);
344     m_reversedPrecedingMarkup.append(markup.toString());
345     endAppendingNode(node);
346     if (m_nodes)
347         m_nodes->append(&node);
348 }
349
350 void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock)
351 {
352     StringBuilder openTag;
353     appendStyleNodeOpenTag(openTag, style, document, isBlock);
354     m_reversedPrecedingMarkup.append(openTag.toString());
355     appendString(styleNodeCloseTag(isBlock));
356 }
357
358 void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock)
359 {
360     // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect
361     ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect));
362     if (isBlock)
363         out.appendLiteral("<div style=\"");
364     else
365         out.appendLiteral("<span style=\"");
366     appendAttributeValue(out, style->asText(), document.isHTMLDocument());
367     out.appendLiteral("\">");
368 }
369
370 const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock)
371 {
372     static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>"));
373     static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>"));
374     return isBlock ? divClose : styleSpanClose;
375 }
376
377 String StyledMarkupAccumulator::takeResults()
378 {
379     StringBuilder result;
380     result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length());
381
382     for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i)
383         result.append(m_reversedPrecedingMarkup[i - 1]);
384
385     concatenateMarkup(result);
386
387     // We remove '\0' characters because they are not visibly rendered to the user.
388     return result.toString().replaceWithLiteral('\0', "");
389 }
390
391 void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text)
392 {    
393     const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement());
394     const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea;
395     if (wrappingSpan) {
396         RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy();
397         // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance
398         // Make sure spans are inline style in paste side e.g. span { display: block }.
399         wrappingStyle->forceInline();
400         // FIXME: Should this be included in forceInline?
401         wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone);
402
403         appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document());
404     }
405
406     if (!shouldAnnotate() || parentIsTextarea) {
407         auto content = textContentRespectingRange(text);
408         appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text));
409     } else {
410         const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag);
411         String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text);
412         StringBuilder buffer;
413         appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA);
414         out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text));
415     }
416
417     if (wrappingSpan)
418         out.append(styleNodeCloseTag());
419 }
420     
421 String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text)
422 {
423     TextIteratorBehavior behavior = TextIteratorDefaultBehavior;
424     Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text));
425     Position end;
426     if (&text == m_end.containerNode())
427         end = m_end;
428     else {
429         end = lastPositionInNode(const_cast<Text*>(&text));
430         if (!m_end.isNull())
431             behavior = TextIteratorBehavesAsIfNodesFollowing;
432     }
433
434     return plainText(Range::create(text.document(), start, end).ptr(), behavior);
435 }
436
437 String StyledMarkupAccumulator::textContentRespectingRange(const Text& text)
438 {
439     if (m_start.isNull() && m_end.isNull())
440         return text.data();
441
442     unsigned start = 0;
443     unsigned end = std::numeric_limits<unsigned>::max();
444     if (&text == m_start.containerNode())
445         start = m_start.offsetInContainerNode();
446     if (&text == m_end.containerNode())
447         end = m_end.offsetInContainerNode();
448     ASSERT(start < end);
449     return text.data().substring(start, end - start);
450 }
451
452 void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
453 {
454 #if ENABLE(ATTACHMENT_ELEMENT)
455     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
456         return;
457     
458     if (is<HTMLAttachmentElement>(element)) {
459         auto& attachment = downcast<HTMLAttachmentElement>(element);
460         appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces);
461         if (auto* file = attachment.file()) {
462             // These attributes are only intended for File deserialization, and are removed from the generated attachment
463             // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment.
464             appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces);
465             appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces);
466         }
467     } else if (is<HTMLImageElement>(element)) {
468         if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement())
469             appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces);
470     }
471 #else
472     UNUSED_PARAM(out);
473     UNUSED_PARAM(element);
474     UNUSED_PARAM(namespaces);
475 #endif
476 }
477
478 bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element)
479 {
480     if (m_inMSOList)
481         return true;
482     if (m_shouldPreserveMSOList) {
483         auto style = element.getAttribute(styleAttr);
484         return style.startsWith("mso-list:") || style.contains(";mso-list:") || style.contains("\nmso-list:");
485     }
486     return false;
487 }
488
489 void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode)
490 {
491     const bool documentIsHTML = element.document().isHTMLDocument();
492     const bool isSlotElement = is<HTMLSlotElement>(element);
493     if (UNLIKELY(isSlotElement))
494         out.append("<span");
495     else
496         appendOpenTag(out, element, nullptr);
497
498     appendCustomAttributes(out, element, nullptr);
499
500     const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline);
501     bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element);
502     if (element.hasAttributes()) {
503         for (const Attribute& attribute : element.attributesIterator()) {
504             // We'll handle the style attribute separately, below.
505             if (attribute.name() == styleAttr && shouldOverrideStyleAttr)
506                 continue;
507             if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute))
508                 continue;
509             appendAttribute(out, element, attribute, 0);
510         }
511     }
512
513     if (shouldOverrideStyleAttr) {
514         RefPtr<EditingStyle> newInlineStyle;
515
516         if (shouldApplyWrappingStyle(element)) {
517             newInlineStyle = m_wrappingStyle->copy();
518             newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element));
519             newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element));
520         } else
521             newInlineStyle = EditingStyle::create();
522
523         if (isSlotElement)
524             newInlineStyle->addDisplayContents();
525
526         if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle())
527             newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle());
528
529         if (shouldAnnotateOrForceInline) {
530             if (shouldAnnotate())
531                 newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element)));
532
533             if (addDisplayInline)
534                 newInlineStyle->forceInline();
535             
536             if (m_needsPositionStyleConversion) {
537                 m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle();
538                 m_needClearingDiv |= newInlineStyle->isFloating();
539             }
540
541             // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it
542             // only the ones that affect it and the nodes within it.
543             if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style())
544                 newInlineStyle->style()->removeProperty(CSSPropertyFloat);
545         }
546
547         if (!newInlineStyle->isEmpty()) {
548             out.appendLiteral(" style=\"");
549             appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML);
550             out.append('\"');
551         }
552     }
553
554     appendCloseTag(out, element);
555 }
556
557 void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element)
558 {
559     if (UNLIKELY(is<HTMLSlotElement>(element)))
560         out.append("</span>");
561     else
562         MarkupAccumulator::appendEndTag(out, element);
563 }
564
565 Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end)
566 {
567     ASSERT(comparePositions(start, end) <= 0);
568     auto startNode = start.firstNode();
569     Node* pastEnd = end.computeNodeAfterPosition();
570     if (!pastEnd && end.containerNode())
571         pastEnd = nextSkippingChildren(*end.containerNode());
572
573     if (!m_highestNodeToBeSerialized) {
574         Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString);
575         m_highestNodeToBeSerialized = lastClosed;
576     }
577
578     if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode())
579         m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate());
580
581     return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString);
582 }
583
584 Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode)
585 {
586     const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString;
587
588     m_inMSOList = false;
589
590     unsigned depth = 0;
591     auto enterNode = [&] (Node& node) {
592         if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) {
593             if (appendNodeToPreserveMSOList(node))
594                 return false;
595         }
596
597         bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents();
598         if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag))
599             return false;
600
601         ++depth;
602         if (shouldEmit)
603             startAppendingNode(node);
604
605         return true;
606     };
607
608     Node* lastClosed = nullptr;
609     auto exitNode = [&] (Node& node) {
610         bool closing = depth;
611         if (depth)
612             --depth;
613         if (shouldEmit) {
614             if (closing)
615                 endAppendingNode(node);
616             else
617                 wrapWithNode(node);
618         }
619         lastClosed = &node;
620     };
621
622     Node* lastNode = nullptr;
623     Node* next = nullptr;
624     for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) {
625
626         Vector<Node*, 8> exitedAncestors;
627         next = nullptr;
628         if (auto* child = firstChild(*n))
629             next = child;
630         else if (auto* sibling = nextSibling(*n))
631             next = sibling;
632         else {
633             for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) {
634                 exitedAncestors.append(ancestor);
635                 if (auto* sibling = nextSibling(*ancestor)) {
636                     next = sibling;
637                     break;
638                 }
639             }
640         }
641         ASSERT(next || !pastEnd);
642
643         if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) {
644             // Don't write out empty block containers that aren't fully selected.
645             continue;
646         }
647
648         if (!enterNode(*n)) {
649             next = nextSkippingChildren(*n);
650             // Don't skip over pastEnd.
651             if (pastEnd && isDescendantOf(*pastEnd, *n))
652                 next = pastEnd;
653             ASSERT(next || !pastEnd);
654         } else {
655             if (!hasChildNodes(*n))
656                 exitNode(*n);
657         }
658
659         for (auto* ancestor : exitedAncestors) {
660             if (!depth && next == pastEnd)
661                 break;
662             exitNode(*ancestor);
663         }
664     }
665     
666     ASSERT(lastNode || !depth);
667     if (depth) {
668         for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor))
669             exitNode(*ancestor);
670     }
671
672     return lastClosed;
673 }
674
675 bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node)
676 {
677     if (is<Comment>(node)) {
678         auto& commentNode = downcast<Comment>(node);
679         if (!m_inMSOList && commentNode.data() == "[if !supportLists]")
680             m_inMSOList = true;
681         else if (m_inMSOList && commentNode.data() == "[endif]")
682             m_inMSOList = false;
683         else
684             return false;
685         startAppendingNode(commentNode);
686         return true;
687     }
688     if (is<HTMLStyleElement>(node)) {
689         auto* firstChild = node.firstChild();
690         if (!is<Text>(firstChild))
691             return false;
692
693         auto& textChild = downcast<Text>(*firstChild);
694         auto& styleContent = textChild.data();
695
696         const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */");
697         const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */");
698         const auto lastListItem = styleContent.reverseFind("\n@list");
699         if (msoListDefinitionsStart == notFound || lastListItem == notFound)
700             return false;
701         const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart;
702
703         const auto msoListDefinitionsEnd = styleContent.find(";}\n", lastListItem);
704         if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd)
705             return false;
706
707         appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n");
708         appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3));
709         appendString("\n-->\n</style></head>");
710
711         return true;
712     }
713     return false;
714 }
715
716 static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock)
717 {
718     if (!commonAncestorBlock)
719         return nullptr;
720
721     if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) {
722         ContainerNode* table = commonAncestorBlock->parentNode();
723         while (table && !is<HTMLTableElement>(*table))
724             table = table->parentNode();
725
726         return table;
727     }
728
729     if (isNonTableCellHTMLBlockElement(commonAncestorBlock))
730         return commonAncestorBlock;
731
732     return nullptr;
733 }
734
735 static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor)
736 {
737     return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor));
738 }
739
740 static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID)
741 {
742     if (!style)
743         return false;
744     RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID);
745     if (!value)
746         return true;
747     if (!is<CSSPrimitiveValue>(*value))
748         return false;
749     return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone;
750 }
751
752 static bool needInterchangeNewlineAfter(const VisiblePosition& v)
753 {
754     VisiblePosition next = v.next();
755     Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode();
756     Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode();
757     // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it.
758     return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode);
759 }
760
761 static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node)
762 {
763     if (!is<HTMLElement>(node))
764         return nullptr;
765
766     auto& element = downcast<HTMLElement>(node);
767     RefPtr<EditingStyle> style = EditingStyle::create(element.inlineStyle());
768     style->mergeStyleFromRules(element);
769     return style;
770 }
771
772 static bool isElementPresentational(const Node* node)
773 {
774     return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag)
775         || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag);
776 }
777
778 static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate)
779 {
780     Node* specialCommonAncestor = nullptr;
781     if (annotate == AnnotateForInterchange::Yes) {
782         // Include ancestors that aren't completely inside the range but are required to retain 
783         // the structure and appearance of the copied markup.
784         specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor);
785
786         if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) {
787             if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) {
788                 specialCommonAncestor = parentListNode->parentNode();
789                 while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor))
790                     specialCommonAncestor = specialCommonAncestor->parentNode();
791             }
792         }
793
794         // Retain the Mail quote level by including all ancestor mail block quotes.
795         if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary))
796             specialCommonAncestor = highestMailBlockquote;
797     }
798
799     auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor;
800     if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) {
801         Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element());
802         if (newSpecialCommonAncestor)
803             specialCommonAncestor = newSpecialCommonAncestor;
804     }
805
806     // If a single tab is selected, commonAncestor will be a text node inside a tab span.
807     // If two or more tabs are selected, commonAncestor will be the tab span.
808     // In either case, if there is a specialCommonAncestor already, it will necessarily be above 
809     // any tab span that needs to be included.
810     if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor))
811         specialCommonAncestor = commonAncestor.parentNode();
812     if (!specialCommonAncestor && isTabSpanNode(&commonAncestor))
813         specialCommonAncestor = &commonAncestor;
814
815     if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag))
816         specialCommonAncestor = enclosingAnchor;
817
818     return specialCommonAncestor;
819 }
820
821 static RefPtr<Node> commonShadowIncludingAncestor(const Position& a, const Position& b)
822 {
823     TreeScope* commonScope = commonTreeScope(a.containerNode(), b.containerNode());
824     if (!commonScope)
825         return nullptr;
826     auto* nodeA = commonScope->ancestorNodeInThisScope(a.containerNode());
827     ASSERT(nodeA);
828     auto* nodeB = commonScope->ancestorNodeInThisScope(b.containerNode());
829     ASSERT(nodeB);
830     return Range::commonAncestorContainer(nodeA, nodeB);
831 }
832
833 static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
834     AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode)
835 {
836     static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">"));
837
838     if (!comparePositions(start, end))
839         return emptyString();
840
841     RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end);
842     if (!commonAncestor)
843         return emptyString();
844
845     auto& document = *start.document();
846     document.updateLayoutIgnorePendingStylesheets();
847
848     VisiblePosition visibleStart { start };
849     VisiblePosition visibleEnd { end };
850
851     auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag));
852     RefPtr<Element> fullySelectedRoot;
853     // FIXME: Do this for all fully selected blocks, not just the body.
854     if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd)
855         fullySelectedRoot = body;
856     bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy();
857
858     Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate);
859
860     StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor);
861
862     Position startAdjustedForInterchangeNewline = start;
863     if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) {
864         if (visibleStart == visibleEnd.previous())
865             return interchangeNewlineString;
866
867         accumulator.appendString(interchangeNewlineString);
868         startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent();
869
870         if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0)
871             return interchangeNewlineString;
872     }
873
874     Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end);
875
876     if (specialCommonAncestor && lastClosed) {
877         // Also include all of the ancestors of lastClosed up to this special ancestor.
878         for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) {
879             if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) {
880                 RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot);
881
882                 // Bring the background attribute over, but not as an attribute because a background attribute on a div
883                 // appears to have no effect.
884                 if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage))
885                     && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr))
886                     fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')");
887
888                 if (fullySelectedRootStyle->style()) {
889                     // Reset the CSS properties to avoid an assertion error in addStyleMarkup().
890                     // This assertion is caused at least when we select all text of a <body> element whose
891                     // 'text-decoration' property is "inherit", and copy it.
892                     if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration))
893                         fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone);
894                     if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect))
895                         fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone);
896                     accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true);
897                 }
898             } else {
899                 // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode
900                 // so that styles that affect the exterior of the node are not included.
901                 accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode);
902             }
903             if (nodes)
904                 nodes->append(ancestor);
905             
906             if (ancestor == specialCommonAncestor)
907                 break;
908         }
909     }
910     
911     if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) {
912         if (accumulator.needClearingDiv())
913             accumulator.appendString("<div style=\"clear: both;\"></div>");
914         RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body);
915         positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative);
916         accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true);
917     }
918
919     // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally.
920     if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous()))
921         accumulator.appendString(interchangeNewlineString);
922
923     return accumulator.takeResults();
924 }
925
926 String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve)
927 {
928     return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No,
929         annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve);
930 }
931
932 String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes)
933 {
934     return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree,
935         AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve);
936 }
937
938
939 static bool shouldPreserveMSOLists(const String& markup)
940 {
941     if (!markup.startsWith("<html xmlns:"))
942         return false;
943     auto tagClose = markup.find('>');
944     if (tagClose == notFound)
945         return false;
946     auto htmlTag = markup.substring(0, tagClose);
947     return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"")
948         && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"");
949 }
950
951 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)
952 {
953     MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup)
954         ? MSOListMode::Preserve : MSOListMode::DoNotPreserve;
955
956     auto bodyElement = makeRefPtr(document.body());
957     ASSERT(bodyElement);
958     bodyElement->appendChild(fragment.get());
959
960     // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment.
961     auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr,
962         ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No,  msoListMode);
963
964     if (msoListMode == MSOListMode::Preserve) {
965         StringBuilder builder;
966         builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n"
967             "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n"
968             "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n"
969             "xmlns=\"http://www.w3.org/TR/REC-html40\">");
970         builder.append(result);
971         builder.appendLiteral("</html>");
972         return builder.toString();
973     }
974
975     return result;
976 }
977
978 static void restoreAttachmentElementsInFragment(DocumentFragment& fragment)
979 {
980 #if ENABLE(ATTACHMENT_ELEMENT)
981     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
982         return;
983
984     // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object.
985     Vector<Ref<HTMLAttachmentElement>> attachments;
986     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment))
987         attachments.append(attachment);
988
989     for (auto& attachment : attachments) {
990         attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr));
991
992         auto attachmentPath = attachment->attachmentPath();
993         auto blobURL = attachment->blobURL();
994         if (!attachmentPath.isEmpty())
995             attachment->setFile(File::create(attachmentPath));
996         else if (!blobURL.isEmpty())
997             attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle()));
998
999         // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes.
1000         attachment->removeAttribute(webkitattachmentidAttr);
1001         attachment->removeAttribute(webkitattachmentpathAttr);
1002         attachment->removeAttribute(webkitattachmentbloburlAttr);
1003     }
1004
1005     Vector<Ref<HTMLImageElement>> images;
1006     for (auto& image : descendantsOfType<HTMLImageElement>(fragment))
1007         images.append(image);
1008
1009     for (auto& image : images) {
1010         auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr);
1011         if (attachmentIdentifier.isEmpty())
1012             continue;
1013
1014         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument());
1015         attachment->setUniqueIdentifier(attachmentIdentifier);
1016         image->setAttachmentElement(WTFMove(attachment));
1017         image->removeAttribute(webkitattachmentidAttr);
1018     }
1019 #else
1020     UNUSED_PARAM(fragment);
1021 #endif
1022 }
1023
1024 Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy)
1025 {
1026     // We use a fake body element here to trick the HTML parser into using the InBody insertion mode.
1027     auto fakeBody = HTMLBodyElement::create(document);
1028     auto fragment = DocumentFragment::create(document);
1029
1030     fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy);
1031     restoreAttachmentElementsInFragment(fragment);
1032     if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL())
1033         completeURLs(fragment.ptr(), baseURL);
1034
1035     return fragment;
1036 }
1037
1038 String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax)
1039 {
1040     MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax);
1041     return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip);
1042 }
1043
1044 static void fillContainerFromString(ContainerNode& paragraph, const String& string)
1045 {
1046     Document& document = paragraph.document();
1047
1048     if (string.isEmpty()) {
1049         paragraph.appendChild(createBlockPlaceholderElement(document));
1050         return;
1051     }
1052
1053     ASSERT(string.find('\n') == notFound);
1054
1055     Vector<String> tabList = string.splitAllowingEmptyEntries('\t');
1056     String tabText = emptyString();
1057     bool first = true;
1058     size_t numEntries = tabList.size();
1059     for (size_t i = 0; i < numEntries; ++i) {
1060         const String& s = tabList[i];
1061
1062         // append the non-tab textual part
1063         if (!s.isEmpty()) {
1064             if (!tabText.isEmpty()) {
1065                 paragraph.appendChild(createTabSpanElement(document, tabText));
1066                 tabText = emptyString();
1067             }
1068             Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries));
1069             paragraph.appendChild(textNode);
1070         }
1071
1072         // there is a tab after every entry, except the last entry
1073         // (if the last character is a tab, the list gets an extra empty entry)
1074         if (i + 1 != numEntries)
1075             tabText.append('\t');
1076         else if (!tabText.isEmpty())
1077             paragraph.appendChild(createTabSpanElement(document, tabText));
1078
1079         first = false;
1080     }
1081 }
1082
1083 bool isPlainTextMarkup(Node* node)
1084 {
1085     ASSERT(node);
1086     if (!is<HTMLDivElement>(*node))
1087         return false;
1088
1089     HTMLDivElement& element = downcast<HTMLDivElement>(*node);
1090     if (element.hasAttributes())
1091         return false;
1092
1093     Node* firstChild = element.firstChild();
1094     if (!firstChild)
1095         return false;
1096
1097     Node* secondChild = firstChild->nextSibling();
1098     if (!secondChild)
1099         return firstChild->isTextNode() || firstChild->firstChild();
1100     
1101     if (secondChild->nextSibling())
1102         return false;
1103     
1104     return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode();
1105 }
1106
1107 static bool contextPreservesNewline(const Range& context)
1108 {
1109     VisiblePosition position(context.startPosition());
1110     Node* container = position.deepEquivalent().containerNode();
1111     if (!container || !container->renderer())
1112         return false;
1113
1114     return container->renderer()->style().preserveNewline();
1115 }
1116
1117 Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text)
1118 {
1119     Document& document = context.ownerDocument();
1120     Ref<DocumentFragment> fragment = document.createDocumentFragment();
1121     
1122     if (text.isEmpty())
1123         return fragment;
1124
1125     String string = text;
1126     string.replace("\r\n", "\n");
1127     string.replace('\r', '\n');
1128
1129     if (contextPreservesNewline(context)) {
1130         fragment->appendChild(document.createTextNode(string));
1131         if (string.endsWith('\n')) {
1132             auto element = HTMLBRElement::create(document);
1133             element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1134             fragment->appendChild(element);
1135         }
1136         return fragment;
1137     }
1138
1139     // A string with no newlines gets added inline, rather than being put into a paragraph.
1140     if (string.find('\n') == notFound) {
1141         fillContainerFromString(fragment, string);
1142         return fragment;
1143     }
1144
1145     // Break string into paragraphs. Extra line breaks turn into empty paragraphs.
1146     Node* blockNode = enclosingBlock(context.firstNode());
1147     Element* block = downcast<Element>(blockNode);
1148     bool useClonesOfEnclosingBlock = blockNode
1149         && blockNode->isElementNode()
1150         && !block->hasTagName(bodyTag)
1151         && !block->hasTagName(htmlTag)
1152         && block != editableRootForPosition(context.startPosition());
1153     bool useLineBreak = enclosingTextFormControl(context.startPosition());
1154
1155     Vector<String> list = string.splitAllowingEmptyEntries('\n');
1156     size_t numLines = list.size();
1157     for (size_t i = 0; i < numLines; ++i) {
1158         const String& s = list[i];
1159
1160         RefPtr<Element> element;
1161         if (s.isEmpty() && i + 1 == numLines) {
1162             // For last line, use the "magic BR" rather than a P.
1163             element = HTMLBRElement::create(document);
1164             element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1165         } else if (useLineBreak) {
1166             element = HTMLBRElement::create(document);
1167             fillContainerFromString(fragment, s);
1168         } else {
1169             if (useClonesOfEnclosingBlock)
1170                 element = block->cloneElementWithoutChildren(document);
1171             else
1172                 element = createDefaultParagraphElement(document);
1173             fillContainerFromString(*element, s);
1174         }
1175         fragment->appendChild(*element);
1176     }
1177     return fragment;
1178 }
1179
1180 String documentTypeString(const Document& document)
1181 {
1182     DocumentType* documentType = document.doctype();
1183     if (!documentType)
1184         return emptyString();
1185     return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode);
1186 }
1187
1188 String urlToMarkup(const URL& url, const String& title)
1189 {
1190     StringBuilder markup;
1191     markup.appendLiteral("<a href=\"");
1192     markup.append(url.string());
1193     markup.appendLiteral("\">");
1194     MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA);
1195     markup.appendLiteral("</a>");
1196     return markup.toString();
1197 }
1198
1199 ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy)
1200 {
1201     auto* document = &contextElement.document();
1202     if (contextElement.hasTagName(templateTag))
1203         document = &document->ensureTemplateDocument();
1204     auto fragment = DocumentFragment::create(*document);
1205
1206     if (document->isHTMLDocument()) {
1207         fragment->parseHTML(markup, &contextElement, parserContentPolicy);
1208         return WTFMove(fragment);
1209     }
1210
1211     bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy);
1212     if (!wasValid)
1213         return Exception { SyntaxError };
1214     return WTFMove(fragment);
1215 }
1216
1217 RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType)
1218 {
1219     RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment();
1220     
1221     if (sourceMIMEType == "text/html") {
1222         // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work.
1223         // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode.
1224         // Unfortunately, that's an implementation detail of the parser.
1225         // We achieve that effect here by passing in a fake body element as context for the fragment.
1226         RefPtr<HTMLBodyElement> fakeBody = HTMLBodyElement::create(outputDoc);
1227         fragment->parseHTML(sourceString, fakeBody.get());
1228     } else if (sourceMIMEType == "text/plain")
1229         fragment->parserAppendChild(Text::create(outputDoc, sourceString));
1230     else {
1231         bool successfulParse = fragment->parseXML(sourceString, 0);
1232         if (!successfulParse)
1233             return nullptr;
1234     }
1235     
1236     // FIXME: Do we need to mess with URLs here?
1237     
1238     return fragment;
1239 }
1240
1241 Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url)
1242 {
1243     auto imageElement = HTMLImageElement::create(document);
1244     imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url);
1245
1246     auto fragment = document.createDocumentFragment();
1247     fragment->appendChild(imageElement);
1248
1249     return fragment;
1250 }
1251
1252 static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container)
1253 {
1254     Vector<Ref<HTMLElement>> toRemove;
1255     for (auto& element : childrenOfType<HTMLElement>(container)) {
1256         if (is<HTMLHtmlElement>(element)) {
1257             toRemove.append(element);
1258             collectElementsToRemoveFromFragment(element);
1259             continue;
1260         }
1261         if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element))
1262             toRemove.append(element);
1263     }
1264     return toRemove;
1265 }
1266
1267 static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element)
1268 {
1269     RefPtr<Node> nextChild;
1270     for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) {
1271         nextChild = child->nextSibling();
1272         element.removeChild(*child);
1273         fragment.insertBefore(*child, &element);
1274     }
1275     fragment.removeChild(element);
1276 }
1277
1278 ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy)
1279 {
1280     auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy);
1281     if (result.hasException())
1282         return result.releaseException();
1283
1284     auto fragment = result.releaseReturnValue();
1285
1286     // We need to pop <html> and <body> elements and remove <head> to
1287     // accommodate folks passing complete HTML documents to make the
1288     // child of an element.
1289     auto toRemove = collectElementsToRemoveFromFragment(fragment);
1290     for (auto& element : toRemove)
1291         removeElementFromFragmentPreservingChildren(fragment, element);
1292
1293     return WTFMove(fragment);
1294 }
1295
1296 static inline bool hasOneChild(ContainerNode& node)
1297 {
1298     Node* firstChild = node.firstChild();
1299     return firstChild && !firstChild->nextSibling();
1300 }
1301
1302 static inline bool hasOneTextChild(ContainerNode& node)
1303 {
1304     return hasOneChild(node) && node.firstChild()->isTextNode();
1305 }
1306
1307 static inline bool hasMutationEventListeners(const Document& document)
1308 {
1309     return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER)
1310         || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER)
1311         || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER)
1312         || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER)
1313         || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER);
1314 }
1315
1316 // We can use setData instead of replacing Text node as long as script can't observe the difference.
1317 static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope)
1318 {
1319     bool authorScriptMayHaveReference = containerChild.refCount();
1320     return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document());
1321 }
1322
1323 ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment)
1324 {
1325     Ref<ContainerNode> containerNode(container);
1326     ChildListMutationScope mutation(containerNode);
1327
1328     if (!fragment->firstChild()) {
1329         containerNode->removeChildren();
1330         return { };
1331     }
1332
1333     auto* containerChild = containerNode->firstChild();
1334     if (containerChild && !containerChild->nextSibling()) {
1335         if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) {
1336             ASSERT(!fragment->firstChild()->refCount());
1337             downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data());
1338             return { };
1339         }
1340
1341         return containerNode->replaceChild(fragment, *containerChild);
1342     }
1343
1344     containerNode->removeChildren();
1345     return containerNode->appendChild(fragment);
1346 }
1347
1348 }