Copying content with shadow DOM doesn't copy any contents
[WebKit-https.git] / Source / WebCore / editing / markup.cpp
1 /*
2  * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
3  * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved.
4  * Copyright (C) 2011 Igalia S.L.
5  * Copyright (C) 2011 Motorola Mobility. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
27  */
28
29 #include "config.h"
30 #include "markup.h"
31
32 #include "ArchiveResource.h"
33 #include "CSSPrimitiveValue.h"
34 #include "CSSPropertyNames.h"
35 #include "CSSValue.h"
36 #include "CSSValueKeywords.h"
37 #include "CacheStorageProvider.h"
38 #include "ChildListMutationScope.h"
39 #include "Comment.h"
40 #include "ComposedTreeIterator.h"
41 #include "DocumentFragment.h"
42 #include "DocumentLoader.h"
43 #include "DocumentType.h"
44 #include "Editing.h"
45 #include "Editor.h"
46 #include "EditorClient.h"
47 #include "ElementIterator.h"
48 #include "EmptyClients.h"
49 #include "File.h"
50 #include "Frame.h"
51 #include "FrameLoader.h"
52 #include "HTMLAttachmentElement.h"
53 #include "HTMLBRElement.h"
54 #include "HTMLBodyElement.h"
55 #include "HTMLDivElement.h"
56 #include "HTMLHeadElement.h"
57 #include "HTMLHtmlElement.h"
58 #include "HTMLImageElement.h"
59 #include "HTMLNames.h"
60 #include "HTMLStyleElement.h"
61 #include "HTMLTableElement.h"
62 #include "HTMLTextAreaElement.h"
63 #include "HTMLTextFormControlElement.h"
64 #include "LibWebRTCProvider.h"
65 #include "MarkupAccumulator.h"
66 #include "NodeList.h"
67 #include "Page.h"
68 #include "PageConfiguration.h"
69 #include "Range.h"
70 #include "RenderBlock.h"
71 #include "RuntimeEnabledFeatures.h"
72 #include "Settings.h"
73 #include "SocketProvider.h"
74 #include "StyleProperties.h"
75 #include "TextIterator.h"
76 #include "TypedElementDescendantIterator.h"
77 #include "URL.h"
78 #include "URLParser.h"
79 #include "VisibleSelection.h"
80 #include "VisibleUnits.h"
81 #include <wtf/StdLibExtras.h>
82 #include <wtf/text/StringBuilder.h>
83
84 namespace WebCore {
85
86 using namespace HTMLNames;
87
88 static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID);
89
90 class AttributeChange {
91 public:
92     AttributeChange()
93         : m_name(nullAtom(), nullAtom(), nullAtom())
94     {
95     }
96
97     AttributeChange(Element* element, const QualifiedName& name, const String& value)
98         : m_element(element), m_name(name), m_value(value)
99     {
100     }
101
102     void apply()
103     {
104         m_element->setAttribute(m_name, m_value);
105     }
106
107 private:
108     RefPtr<Element> m_element;
109     QualifiedName m_name;
110     String m_value;
111 };
112
113 static void completeURLs(DocumentFragment* fragment, const String& baseURL)
114 {
115     Vector<AttributeChange> changes;
116
117     URL parsedBaseURL({ }, baseURL);
118
119     for (auto& element : descendantsOfType<Element>(*fragment)) {
120         if (!element.hasAttributes())
121             continue;
122         for (const Attribute& attribute : element.attributesIterator()) {
123             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty())
124                 changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute)));
125         }
126     }
127
128     for (auto& change : changes)
129         change.apply();
130 }
131
132 void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomicString, AtomicString>&& replacementMap)
133 {
134     Vector<AttributeChange> changes;
135     for (auto& element : descendantsOfType<Element>(fragment)) {
136         if (!element.hasAttributes())
137             continue;
138         for (const Attribute& attribute : element.attributesIterator()) {
139             // FIXME: This won't work for srcset.
140             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
141                 auto replacement = replacementMap.get(attribute.value());
142                 if (!replacement.isNull())
143                     changes.append({ &element, attribute.name(), replacement });
144             }
145         }
146     }
147     for (auto& change : changes)
148         change.apply();
149 }
150
151 struct ElementAttribute {
152     Ref<Element> element;
153     QualifiedName attributeName;
154 };
155
156 void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL)
157 {
158     Vector<ElementAttribute> attributesToRemove;
159     for (auto& element : descendantsOfType<Element>(fragment)) {
160         if (!element.hasAttributes())
161             continue;
162         for (const Attribute& attribute : element.attributesIterator()) {
163             // FIXME: This won't work for srcset.
164             if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
165                 URL url = URLParser { attribute.value() }.result();
166                 if (shouldRemoveURL(url))
167                     attributesToRemove.append({ element, attribute.name() });
168             }
169         }
170     }
171     for (auto& item : attributesToRemove)
172         item.element->removeAttribute(item.attributeName);
173 }
174
175 std::unique_ptr<Page> createPageForSanitizingWebContent()
176 {
177     PageConfiguration pageConfiguration(createEmptyEditorClient(), SocketProvider::create(), LibWebRTCProvider::create(), CacheStorageProvider::create());
178
179     fillWithEmptyClients(pageConfiguration);
180     
181     auto page = std::make_unique<Page>(WTFMove(pageConfiguration));
182     page->settings().setMediaEnabled(false);
183     page->settings().setScriptEnabled(false);
184     page->settings().setPluginsEnabled(false);
185     page->settings().setAcceleratedCompositingEnabled(false);
186
187     Frame& frame = page->mainFrame();
188     frame.setView(FrameView::create(frame));
189     frame.init();
190
191     FrameLoader& loader = frame.loader();
192     static char markup[] = "<!DOCTYPE html><html><body></body></html>";
193     ASSERT(loader.activeDocumentLoader());
194     auto& writer = loader.activeDocumentLoader()->writer();
195     writer.setMIMEType("text/html");
196     writer.begin();
197     writer.insertDataSynchronously(String(markup));
198     writer.end();
199     RELEASE_ASSERT(page->mainFrame().document()->body());
200
201     return page;
202 }
203
204 String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, std::optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
205 {
206     auto page = createPageForSanitizingWebContent();
207     Document* stagingDocument = page->mainFrame().document();
208     ASSERT(stagingDocument);
209
210     auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent);
211
212     if (fragmentSanitizer)
213         (*fragmentSanitizer)(fragment);
214
215     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);
216 }
217
218 enum class MSOListMode { Preserve, DoNotPreserve };
219 class StyledMarkupAccumulator final : public MarkupAccumulator {
220 public:
221     enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode };
222
223     StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree,
224         AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr);
225
226     Node* serializeNodes(const Position& start, const Position& end);
227     void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode);
228     void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false);
229     String takeResults();
230     
231     bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; }
232     bool needClearingDiv() const { return m_needClearingDiv; }
233
234     using MarkupAccumulator::appendString;
235
236     ContainerNode* parentNode(Node& node)
237     {
238         if (UNLIKELY(m_useComposedTree))
239             return node.parentInComposedTree();
240         return node.parentOrShadowHostNode();
241     }
242
243 private:
244     void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false);
245     const String& styleNodeCloseTag(bool isBlock = false);
246
247     String renderedTextRespectingRange(const Text&);
248     String textContentRespectingRange(const Text&);
249
250     bool shouldPreserveMSOListStyleForElement(const Element&);
251
252     void appendElement(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode);
253     void appendEndElement(StringBuilder& out, const Element&) override;
254     void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override;
255
256     void appendText(StringBuilder& out, const Text&) override;
257     void appendElement(StringBuilder& out, const Element& element, Namespaces*) override
258     {
259         appendElement(out, element, false, DoesFullySelectNode);
260     }
261
262     Node* firstChild(Node& node)
263     {
264         if (UNLIKELY(m_useComposedTree))
265             return firstChildInComposedTreeIgnoringUserAgentShadow(node);
266         return node.firstChild();
267     }
268
269     Node* nextSibling(Node& node)
270     {
271         if (UNLIKELY(m_useComposedTree))
272             return nextSiblingInComposedTreeIgnoringUserAgentShadow(node);
273         return node.nextSibling();
274     }
275     
276     Node* nextSkippingChildren(Node& node)
277     {
278         if (UNLIKELY(m_useComposedTree)) {
279             if (auto* sibling = nextSiblingInComposedTreeIgnoringUserAgentShadow(node))
280                 return sibling;
281             for (auto* ancestor = node.parentInComposedTree(); ancestor; ancestor = ancestor->parentInComposedTree()) {
282                 if (auto* sibling = nextSiblingInComposedTreeIgnoringUserAgentShadow(*ancestor))
283                     return sibling;
284             }
285             return nullptr;
286         }
287         return NodeTraversal::nextSkippingChildren(node);
288     }
289
290     bool hasChildNodes(Node& node)
291     {
292         if (UNLIKELY(m_useComposedTree))
293             return firstChildInComposedTreeIgnoringUserAgentShadow(node);
294         return node.hasChildNodes();
295     }
296
297     bool isDescendantOf(Node& node, Node& possibleAncestor)
298     {
299         if (UNLIKELY(m_useComposedTree))
300             return node.isDescendantOrShadowDescendantOf(&possibleAncestor);
301         return node.isDescendantOf(&possibleAncestor);
302     }
303
304     enum class NodeTraversalMode { EmitString, DoNotEmitString };
305     Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode);
306
307     bool appendNodeToPreserveMSOList(Node&);
308
309     bool shouldAnnotate()
310     {
311         return m_annotate == AnnotateForInterchange::Yes;
312     }
313
314     bool shouldApplyWrappingStyle(const Node& node) const
315     {
316         return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style();
317     }
318
319     Position m_start;
320     Position m_end;
321     Vector<String> m_reversedPrecedingMarkup;
322     const AnnotateForInterchange m_annotate;
323     RefPtr<Node> m_highestNodeToBeSerialized;
324     RefPtr<EditingStyle> m_wrappingStyle;
325     bool m_useComposedTree;
326     bool m_needsPositionStyleConversion;
327     bool m_needRelativeStyleWrapper { false };
328     bool m_needClearingDiv { false };
329     bool m_shouldPreserveMSOList;
330     bool m_inMSOList { false };
331 };
332
333 inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
334     AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized)
335     : MarkupAccumulator(nodes, urlsToResolve)
336     , m_start(start)
337     , m_end(end)
338     , m_annotate(annotate)
339     , m_highestNodeToBeSerialized(highestNodeToBeSerialized)
340     , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes)
341     , m_needsPositionStyleConversion(needsPositionStyleConversion)
342     , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve)
343 {
344 }
345
346 void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode)
347 {
348     StringBuilder markup;
349     if (is<Element>(node))
350         appendElement(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode);
351     else
352         appendStartMarkup(markup, node, nullptr);
353     m_reversedPrecedingMarkup.append(markup.toString());
354     appendEndTag(node);
355     if (m_nodes)
356         m_nodes->append(&node);
357 }
358
359 void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock)
360 {
361     StringBuilder openTag;
362     appendStyleNodeOpenTag(openTag, style, document, isBlock);
363     m_reversedPrecedingMarkup.append(openTag.toString());
364     appendString(styleNodeCloseTag(isBlock));
365 }
366
367 void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock)
368 {
369     // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect
370     ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect));
371     if (isBlock)
372         out.appendLiteral("<div style=\"");
373     else
374         out.appendLiteral("<span style=\"");
375     appendAttributeValue(out, style->asText(), document.isHTMLDocument());
376     out.appendLiteral("\">");
377 }
378
379 const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock)
380 {
381     static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>"));
382     static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>"));
383     return isBlock ? divClose : styleSpanClose;
384 }
385
386 String StyledMarkupAccumulator::takeResults()
387 {
388     StringBuilder result;
389     result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length());
390
391     for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i)
392         result.append(m_reversedPrecedingMarkup[i - 1]);
393
394     concatenateMarkup(result);
395
396     // We remove '\0' characters because they are not visibly rendered to the user.
397     return result.toString().replaceWithLiteral('\0', "");
398 }
399
400 void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text)
401 {    
402     const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement());
403     const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea;
404     if (wrappingSpan) {
405         RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy();
406         // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance
407         // Make sure spans are inline style in paste side e.g. span { display: block }.
408         wrappingStyle->forceInline();
409         // FIXME: Should this be included in forceInline?
410         wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone);
411
412         appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document());
413     }
414
415     if (!shouldAnnotate() || parentIsTextarea) {
416         auto content = textContentRespectingRange(text);
417         appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text));
418     } else {
419         const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag);
420         String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text);
421         StringBuilder buffer;
422         appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA);
423         out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text));
424     }
425
426     if (wrappingSpan)
427         out.append(styleNodeCloseTag());
428 }
429     
430 String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text)
431 {
432     TextIteratorBehavior behavior = TextIteratorDefaultBehavior;
433     Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text));
434     Position end;
435     if (&text == m_end.containerNode())
436         end = m_end;
437     else {
438         end = lastPositionInNode(const_cast<Text*>(&text));
439         if (!m_end.isNull())
440             behavior = TextIteratorBehavesAsIfNodesFollowing;
441     }
442
443     return plainText(Range::create(text.document(), start, end).ptr(), behavior);
444 }
445
446 String StyledMarkupAccumulator::textContentRespectingRange(const Text& text)
447 {
448     if (m_start.isNull() && m_end.isNull())
449         return text.data();
450
451     unsigned start = 0;
452     unsigned end = std::numeric_limits<unsigned>::max();
453     if (&text == m_start.containerNode())
454         start = m_start.offsetInContainerNode();
455     if (&text == m_end.containerNode())
456         end = m_end.offsetInContainerNode();
457     ASSERT(start < end);
458     return text.data().substring(start, end - start);
459 }
460
461 void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
462 {
463 #if ENABLE(ATTACHMENT_ELEMENT)
464     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
465         return;
466     
467     if (is<HTMLAttachmentElement>(element)) {
468         auto& attachment = downcast<HTMLAttachmentElement>(element);
469         appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces);
470         if (auto* file = attachment.file()) {
471             // These attributes are only intended for File deserialization, and are removed from the generated attachment
472             // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment.
473             appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces);
474             appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces);
475         }
476     } else if (is<HTMLImageElement>(element)) {
477         if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement())
478             appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces);
479     }
480 #else
481     UNUSED_PARAM(out);
482     UNUSED_PARAM(element);
483     UNUSED_PARAM(namespaces);
484 #endif
485 }
486
487 bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element)
488 {
489     if (m_inMSOList)
490         return true;
491     if (m_shouldPreserveMSOList) {
492         auto style = element.getAttribute(styleAttr);
493         return style.startsWith("mso-list:") || style.contains(";mso-list:") || style.contains("\nmso-list:");
494     }
495     return false;
496 }
497
498 void StyledMarkupAccumulator::appendElement(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode)
499 {
500     const bool documentIsHTML = element.document().isHTMLDocument();
501     const bool isSlotElement = is<HTMLSlotElement>(element);
502     if (UNLIKELY(isSlotElement))
503         out.append("<span");
504     else
505         appendOpenTag(out, element, nullptr);
506
507     appendCustomAttributes(out, element, nullptr);
508
509     const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline);
510     bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element);
511     if (element.hasAttributes()) {
512         for (const Attribute& attribute : element.attributesIterator()) {
513             // We'll handle the style attribute separately, below.
514             if (attribute.name() == styleAttr && shouldOverrideStyleAttr)
515                 continue;
516             if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute))
517                 continue;
518             appendAttribute(out, element, attribute, 0);
519         }
520     }
521
522     if (shouldOverrideStyleAttr) {
523         RefPtr<EditingStyle> newInlineStyle;
524
525         if (shouldApplyWrappingStyle(element)) {
526             newInlineStyle = m_wrappingStyle->copy();
527             newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element));
528             newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element));
529         } else
530             newInlineStyle = EditingStyle::create();
531
532         if (isSlotElement)
533             newInlineStyle->addDisplayContents();
534
535         if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle())
536             newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle());
537
538         if (shouldAnnotateOrForceInline) {
539             if (shouldAnnotate())
540                 newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element)));
541
542             if (addDisplayInline)
543                 newInlineStyle->forceInline();
544             
545             if (m_needsPositionStyleConversion) {
546                 m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle();
547                 m_needClearingDiv |= newInlineStyle->isFloating();
548             }
549
550             // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it
551             // only the ones that affect it and the nodes within it.
552             if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style())
553                 newInlineStyle->style()->removeProperty(CSSPropertyFloat);
554         }
555
556         if (!newInlineStyle->isEmpty()) {
557             out.appendLiteral(" style=\"");
558             appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML);
559             out.append('\"');
560         }
561     }
562
563     appendCloseTag(out, element);
564 }
565
566 void StyledMarkupAccumulator::appendEndElement(StringBuilder& out, const Element& element)
567 {
568     if (UNLIKELY(is<HTMLSlotElement>(element)))
569         out.append("</span>");
570     else
571         MarkupAccumulator::appendEndElement(out, element);
572 }
573
574 Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end)
575 {
576     ASSERT(comparePositions(start, end) <= 0);
577     auto startNode = start.firstNode();
578     Node* pastEnd = end.computeNodeAfterPosition();
579     if (!pastEnd && end.containerNode())
580         pastEnd = nextSkippingChildren(*end.containerNode());
581
582     if (!m_highestNodeToBeSerialized) {
583         Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString);
584         m_highestNodeToBeSerialized = lastClosed;
585     }
586
587     if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode())
588         m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate());
589
590     return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString);
591 }
592
593 Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode)
594 {
595     const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString;
596
597     m_inMSOList = false;
598
599     unsigned depth = 0;
600     auto enterNode = [&] (Node& node) {
601         if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) {
602             if (appendNodeToPreserveMSOList(node))
603                 return false;
604         }
605
606         bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents();
607         if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag))
608             return false;
609
610         ++depth;
611         if (shouldEmit)
612             appendStartTag(node);
613
614         return true;
615     };
616
617     Node* lastClosed = nullptr;
618     auto exitNode = [&] (Node& node) {
619         bool closing = depth;
620         if (depth)
621             --depth;
622         if (shouldEmit) {
623             if (closing)
624                 appendEndTag(node);
625             else
626                 wrapWithNode(node);
627         }
628         lastClosed = &node;
629     };
630
631     Node* lastNode = nullptr;
632     Node* next = nullptr;
633     for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) {
634
635         Vector<Node*, 8> exitedAncestors;
636         next = nullptr;
637         if (auto* child = firstChild(*n))
638             next = child;
639         else if (auto* sibling = nextSibling(*n))
640             next = sibling;
641         else {
642             for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) {
643                 exitedAncestors.append(ancestor);
644                 if (auto* sibling = nextSibling(*ancestor)) {
645                     next = sibling;
646                     break;
647                 }
648             }
649         }
650
651         if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) {
652             // Don't write out empty block containers that aren't fully selected.
653             continue;
654         }
655
656         if (!enterNode(*n)) {
657             next = NodeTraversal::nextSkippingChildren(*n);
658             // Don't skip over pastEnd.
659             if (pastEnd && isDescendantOf(*pastEnd, *n))
660                 next = pastEnd;
661         } else {
662             if (!hasChildNodes(*n))
663                 exitNode(*n);
664         }
665
666         for (auto* ancestor : exitedAncestors) {
667             if (!depth && next == pastEnd)
668                 break;
669             exitNode(*ancestor);
670         }
671     }
672     
673     ASSERT(lastNode || !depth);
674     if (depth) {
675         for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor))
676             exitNode(*ancestor);
677     }
678
679     return lastClosed;
680 }
681
682 bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node)
683 {
684     if (is<Comment>(node)) {
685         auto& commentNode = downcast<Comment>(node);
686         if (!m_inMSOList && commentNode.data() == "[if !supportLists]")
687             m_inMSOList = true;
688         else if (m_inMSOList && commentNode.data() == "[endif]")
689             m_inMSOList = false;
690         else
691             return false;
692         appendStartTag(commentNode);
693         return true;
694     }
695     if (is<HTMLStyleElement>(node)) {
696         auto* firstChild = node.firstChild();
697         if (!is<Text>(firstChild))
698             return false;
699
700         auto& textChild = downcast<Text>(*firstChild);
701         auto& styleContent = textChild.data();
702
703         const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */");
704         const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */");
705         const auto lastListItem = styleContent.reverseFind("\n@list");
706         if (msoListDefinitionsStart == notFound || lastListItem == notFound)
707             return false;
708         const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart;
709
710         const auto msoListDefinitionsEnd = styleContent.find(";}\n", lastListItem);
711         if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd)
712             return false;
713
714         appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n");
715         appendTextSubstring(textChild, start, msoListDefinitionsEnd - start + 3);
716         appendString("\n-->\n</style></head>");
717
718         return true;
719     }
720     return false;
721 }
722
723 static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock)
724 {
725     if (!commonAncestorBlock)
726         return nullptr;
727
728     if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) {
729         ContainerNode* table = commonAncestorBlock->parentNode();
730         while (table && !is<HTMLTableElement>(*table))
731             table = table->parentNode();
732
733         return table;
734     }
735
736     if (isNonTableCellHTMLBlockElement(commonAncestorBlock))
737         return commonAncestorBlock;
738
739     return nullptr;
740 }
741
742 static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor)
743 {
744     return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor));
745 }
746
747 static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID)
748 {
749     if (!style)
750         return false;
751     RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID);
752     if (!value)
753         return true;
754     if (!is<CSSPrimitiveValue>(*value))
755         return false;
756     return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone;
757 }
758
759 static bool needInterchangeNewlineAfter(const VisiblePosition& v)
760 {
761     VisiblePosition next = v.next();
762     Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode();
763     Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode();
764     // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it.
765     return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode);
766 }
767
768 static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node)
769 {
770     if (!is<HTMLElement>(node))
771         return nullptr;
772
773     auto& element = downcast<HTMLElement>(node);
774     RefPtr<EditingStyle> style = EditingStyle::create(element.inlineStyle());
775     style->mergeStyleFromRules(element);
776     return style;
777 }
778
779 static bool isElementPresentational(const Node* node)
780 {
781     return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag)
782         || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag);
783 }
784
785 static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate)
786 {
787     Node* specialCommonAncestor = nullptr;
788     if (annotate == AnnotateForInterchange::Yes) {
789         // Include ancestors that aren't completely inside the range but are required to retain 
790         // the structure and appearance of the copied markup.
791         specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor);
792
793         if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) {
794             if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) {
795                 specialCommonAncestor = parentListNode->parentNode();
796                 while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor))
797                     specialCommonAncestor = specialCommonAncestor->parentNode();
798             }
799         }
800
801         // Retain the Mail quote level by including all ancestor mail block quotes.
802         if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary))
803             specialCommonAncestor = highestMailBlockquote;
804     }
805
806     auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor;
807     if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) {
808         Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element());
809         if (newSpecialCommonAncestor)
810             specialCommonAncestor = newSpecialCommonAncestor;
811     }
812
813     // If a single tab is selected, commonAncestor will be a text node inside a tab span.
814     // If two or more tabs are selected, commonAncestor will be the tab span.
815     // In either case, if there is a specialCommonAncestor already, it will necessarily be above 
816     // any tab span that needs to be included.
817     if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor))
818         specialCommonAncestor = commonAncestor.parentNode();
819     if (!specialCommonAncestor && isTabSpanNode(&commonAncestor))
820         specialCommonAncestor = &commonAncestor;
821
822     if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag))
823         specialCommonAncestor = enclosingAnchor;
824
825     return specialCommonAncestor;
826 }
827
828 static RefPtr<Node> commonShadowIncludingAncestor(const Position& a, const Position& b)
829 {
830     TreeScope* commonScope = commonTreeScope(a.containerNode(), b.containerNode());
831     if (!commonScope)
832         return nullptr;
833     auto* nodeA = commonScope->ancestorNodeInThisScope(a.containerNode());
834     ASSERT(nodeA);
835     auto* nodeB = commonScope->ancestorNodeInThisScope(b.containerNode());
836     ASSERT(nodeB);
837     return Range::commonAncestorContainer(nodeA, nodeB);
838 }
839
840 static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
841     AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode)
842 {
843     static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">"));
844
845     if (!comparePositions(start, end))
846         return emptyString();
847
848     RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end);
849     if (!commonAncestor)
850         return emptyString();
851
852     auto& document = *start.document();
853     document.updateLayoutIgnorePendingStylesheets();
854
855     VisiblePosition visibleStart { start };
856     VisiblePosition visibleEnd { end };
857
858     auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag));
859     RefPtr<Element> fullySelectedRoot;
860     // FIXME: Do this for all fully selected blocks, not just the body.
861     if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd)
862         fullySelectedRoot = body;
863     bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy();
864
865     Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate);
866
867     StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor);
868
869     Position startAdjustedForInterchangeNewline = start;
870     if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) {
871         if (visibleStart == visibleEnd.previous())
872             return interchangeNewlineString;
873
874         accumulator.appendString(interchangeNewlineString);
875         startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent();
876
877         if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0)
878             return interchangeNewlineString;
879     }
880
881     Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end);
882
883     if (specialCommonAncestor && lastClosed) {
884         // Also include all of the ancestors of lastClosed up to this special ancestor.
885         for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) {
886             if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) {
887                 RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot);
888
889                 // Bring the background attribute over, but not as an attribute because a background attribute on a div
890                 // appears to have no effect.
891                 if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage))
892                     && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr))
893                     fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')");
894
895                 if (fullySelectedRootStyle->style()) {
896                     // Reset the CSS properties to avoid an assertion error in addStyleMarkup().
897                     // This assertion is caused at least when we select all text of a <body> element whose
898                     // 'text-decoration' property is "inherit", and copy it.
899                     if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration))
900                         fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone);
901                     if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect))
902                         fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone);
903                     accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true);
904                 }
905             } else {
906                 // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode
907                 // so that styles that affect the exterior of the node are not included.
908                 accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode);
909             }
910             if (nodes)
911                 nodes->append(ancestor);
912             
913             if (ancestor == specialCommonAncestor)
914                 break;
915         }
916     }
917     
918     if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) {
919         if (accumulator.needClearingDiv())
920             accumulator.appendString("<div style=\"clear: both;\"></div>");
921         RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body);
922         positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative);
923         accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true);
924     }
925
926     // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally.
927     if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous()))
928         accumulator.appendString(interchangeNewlineString);
929
930     return accumulator.takeResults();
931 }
932
933 String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve)
934 {
935     return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No,
936         annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve);
937 }
938
939 String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes)
940 {
941     return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree,
942         AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve);
943 }
944
945
946 static bool shouldPreserveMSOLists(const String& markup)
947 {
948     if (!markup.startsWith("<html xmlns:"))
949         return false;
950     auto tagClose = markup.find('>');
951     if (tagClose == notFound)
952         return false;
953     auto htmlTag = markup.substring(0, tagClose);
954     return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"")
955         && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"");
956 }
957
958 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)
959 {
960     MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup)
961         ? MSOListMode::Preserve : MSOListMode::DoNotPreserve;
962
963     auto bodyElement = makeRefPtr(document.body());
964     ASSERT(bodyElement);
965     bodyElement->appendChild(fragment.get());
966
967     // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment.
968     auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr,
969         ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No,  msoListMode);
970
971     if (msoListMode == MSOListMode::Preserve) {
972         StringBuilder builder;
973         builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n"
974             "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n"
975             "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n"
976             "xmlns=\"http://www.w3.org/TR/REC-html40\">");
977         builder.append(result);
978         builder.appendLiteral("</html>");
979         return builder.toString();
980     }
981
982     return result;
983 }
984
985 static void restoreAttachmentElementsInFragment(DocumentFragment& fragment)
986 {
987 #if ENABLE(ATTACHMENT_ELEMENT)
988     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
989         return;
990
991     // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object.
992     Vector<Ref<HTMLAttachmentElement>> attachments;
993     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment))
994         attachments.append(attachment);
995
996     for (auto& attachment : attachments) {
997         attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr));
998
999         auto attachmentPath = attachment->attachmentPath();
1000         auto blobURL = attachment->blobURL();
1001         if (!attachmentPath.isEmpty())
1002             attachment->setFile(File::create(attachmentPath));
1003         else if (!blobURL.isEmpty())
1004             attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle()));
1005
1006         // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes.
1007         attachment->removeAttribute(webkitattachmentidAttr);
1008         attachment->removeAttribute(webkitattachmentpathAttr);
1009         attachment->removeAttribute(webkitattachmentbloburlAttr);
1010     }
1011
1012     Vector<Ref<HTMLImageElement>> images;
1013     for (auto& image : descendantsOfType<HTMLImageElement>(fragment))
1014         images.append(image);
1015
1016     for (auto& image : images) {
1017         auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr);
1018         if (attachmentIdentifier.isEmpty())
1019             continue;
1020
1021         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument());
1022         attachment->setUniqueIdentifier(attachmentIdentifier);
1023         image->setAttachmentElement(WTFMove(attachment));
1024         image->removeAttribute(webkitattachmentidAttr);
1025     }
1026 #else
1027     UNUSED_PARAM(fragment);
1028 #endif
1029 }
1030
1031 Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy)
1032 {
1033     // We use a fake body element here to trick the HTML parser into using the InBody insertion mode.
1034     auto fakeBody = HTMLBodyElement::create(document);
1035     auto fragment = DocumentFragment::create(document);
1036
1037     fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy);
1038     restoreAttachmentElementsInFragment(fragment);
1039     if (!baseURL.isEmpty() && baseURL != blankURL() && baseURL != document.baseURL())
1040         completeURLs(fragment.ptr(), baseURL);
1041
1042     return fragment;
1043 }
1044
1045 String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax)
1046 {
1047     MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax);
1048     return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip);
1049 }
1050
1051 static void fillContainerFromString(ContainerNode& paragraph, const String& string)
1052 {
1053     Document& document = paragraph.document();
1054
1055     if (string.isEmpty()) {
1056         paragraph.appendChild(createBlockPlaceholderElement(document));
1057         return;
1058     }
1059
1060     ASSERT(string.find('\n') == notFound);
1061
1062     Vector<String> tabList = string.splitAllowingEmptyEntries('\t');
1063     String tabText = emptyString();
1064     bool first = true;
1065     size_t numEntries = tabList.size();
1066     for (size_t i = 0; i < numEntries; ++i) {
1067         const String& s = tabList[i];
1068
1069         // append the non-tab textual part
1070         if (!s.isEmpty()) {
1071             if (!tabText.isEmpty()) {
1072                 paragraph.appendChild(createTabSpanElement(document, tabText));
1073                 tabText = emptyString();
1074             }
1075             Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries));
1076             paragraph.appendChild(textNode);
1077         }
1078
1079         // there is a tab after every entry, except the last entry
1080         // (if the last character is a tab, the list gets an extra empty entry)
1081         if (i + 1 != numEntries)
1082             tabText.append('\t');
1083         else if (!tabText.isEmpty())
1084             paragraph.appendChild(createTabSpanElement(document, tabText));
1085
1086         first = false;
1087     }
1088 }
1089
1090 bool isPlainTextMarkup(Node* node)
1091 {
1092     ASSERT(node);
1093     if (!is<HTMLDivElement>(*node))
1094         return false;
1095
1096     HTMLDivElement& element = downcast<HTMLDivElement>(*node);
1097     if (element.hasAttributes())
1098         return false;
1099
1100     Node* firstChild = element.firstChild();
1101     if (!firstChild)
1102         return false;
1103
1104     Node* secondChild = firstChild->nextSibling();
1105     if (!secondChild)
1106         return firstChild->isTextNode() || firstChild->firstChild();
1107     
1108     if (secondChild->nextSibling())
1109         return false;
1110     
1111     return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode();
1112 }
1113
1114 static bool contextPreservesNewline(const Range& context)
1115 {
1116     VisiblePosition position(context.startPosition());
1117     Node* container = position.deepEquivalent().containerNode();
1118     if (!container || !container->renderer())
1119         return false;
1120
1121     return container->renderer()->style().preserveNewline();
1122 }
1123
1124 Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text)
1125 {
1126     Document& document = context.ownerDocument();
1127     Ref<DocumentFragment> fragment = document.createDocumentFragment();
1128     
1129     if (text.isEmpty())
1130         return fragment;
1131
1132     String string = text;
1133     string.replace("\r\n", "\n");
1134     string.replace('\r', '\n');
1135
1136     if (contextPreservesNewline(context)) {
1137         fragment->appendChild(document.createTextNode(string));
1138         if (string.endsWith('\n')) {
1139             auto element = HTMLBRElement::create(document);
1140             element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1141             fragment->appendChild(element);
1142         }
1143         return fragment;
1144     }
1145
1146     // A string with no newlines gets added inline, rather than being put into a paragraph.
1147     if (string.find('\n') == notFound) {
1148         fillContainerFromString(fragment, string);
1149         return fragment;
1150     }
1151
1152     // Break string into paragraphs. Extra line breaks turn into empty paragraphs.
1153     Node* blockNode = enclosingBlock(context.firstNode());
1154     Element* block = downcast<Element>(blockNode);
1155     bool useClonesOfEnclosingBlock = blockNode
1156         && blockNode->isElementNode()
1157         && !block->hasTagName(bodyTag)
1158         && !block->hasTagName(htmlTag)
1159         && block != editableRootForPosition(context.startPosition());
1160     bool useLineBreak = enclosingTextFormControl(context.startPosition());
1161
1162     Vector<String> list = string.splitAllowingEmptyEntries('\n');
1163     size_t numLines = list.size();
1164     for (size_t i = 0; i < numLines; ++i) {
1165         const String& s = list[i];
1166
1167         RefPtr<Element> element;
1168         if (s.isEmpty() && i + 1 == numLines) {
1169             // For last line, use the "magic BR" rather than a P.
1170             element = HTMLBRElement::create(document);
1171             element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1172         } else if (useLineBreak) {
1173             element = HTMLBRElement::create(document);
1174             fillContainerFromString(fragment, s);
1175         } else {
1176             if (useClonesOfEnclosingBlock)
1177                 element = block->cloneElementWithoutChildren(document);
1178             else
1179                 element = createDefaultParagraphElement(document);
1180             fillContainerFromString(*element, s);
1181         }
1182         fragment->appendChild(*element);
1183     }
1184     return fragment;
1185 }
1186
1187 String documentTypeString(const Document& document)
1188 {
1189     DocumentType* documentType = document.doctype();
1190     if (!documentType)
1191         return emptyString();
1192     return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode);
1193 }
1194
1195 String urlToMarkup(const URL& url, const String& title)
1196 {
1197     StringBuilder markup;
1198     markup.appendLiteral("<a href=\"");
1199     markup.append(url.string());
1200     markup.appendLiteral("\">");
1201     MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA);
1202     markup.appendLiteral("</a>");
1203     return markup.toString();
1204 }
1205
1206 ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy)
1207 {
1208     auto* document = &contextElement.document();
1209     if (contextElement.hasTagName(templateTag))
1210         document = &document->ensureTemplateDocument();
1211     auto fragment = DocumentFragment::create(*document);
1212
1213     if (document->isHTMLDocument()) {
1214         fragment->parseHTML(markup, &contextElement, parserContentPolicy);
1215         return WTFMove(fragment);
1216     }
1217
1218     bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy);
1219     if (!wasValid)
1220         return Exception { SyntaxError };
1221     return WTFMove(fragment);
1222 }
1223
1224 RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType)
1225 {
1226     RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment();
1227     
1228     if (sourceMIMEType == "text/html") {
1229         // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work.
1230         // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode.
1231         // Unfortunately, that's an implementation detail of the parser.
1232         // We achieve that effect here by passing in a fake body element as context for the fragment.
1233         RefPtr<HTMLBodyElement> fakeBody = HTMLBodyElement::create(outputDoc);
1234         fragment->parseHTML(sourceString, fakeBody.get());
1235     } else if (sourceMIMEType == "text/plain")
1236         fragment->parserAppendChild(Text::create(outputDoc, sourceString));
1237     else {
1238         bool successfulParse = fragment->parseXML(sourceString, 0);
1239         if (!successfulParse)
1240             return nullptr;
1241     }
1242     
1243     // FIXME: Do we need to mess with URLs here?
1244     
1245     return fragment;
1246 }
1247
1248 Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url)
1249 {
1250     auto imageElement = HTMLImageElement::create(document);
1251     imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url);
1252
1253     auto fragment = document.createDocumentFragment();
1254     fragment->appendChild(imageElement);
1255
1256     return fragment;
1257 }
1258
1259 static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container)
1260 {
1261     Vector<Ref<HTMLElement>> toRemove;
1262     for (auto& element : childrenOfType<HTMLElement>(container)) {
1263         if (is<HTMLHtmlElement>(element)) {
1264             toRemove.append(element);
1265             collectElementsToRemoveFromFragment(element);
1266             continue;
1267         }
1268         if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element))
1269             toRemove.append(element);
1270     }
1271     return toRemove;
1272 }
1273
1274 static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element)
1275 {
1276     RefPtr<Node> nextChild;
1277     for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) {
1278         nextChild = child->nextSibling();
1279         element.removeChild(*child);
1280         fragment.insertBefore(*child, &element);
1281     }
1282     fragment.removeChild(element);
1283 }
1284
1285 ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy)
1286 {
1287     auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy);
1288     if (result.hasException())
1289         return result.releaseException();
1290
1291     auto fragment = result.releaseReturnValue();
1292
1293     // We need to pop <html> and <body> elements and remove <head> to
1294     // accommodate folks passing complete HTML documents to make the
1295     // child of an element.
1296     auto toRemove = collectElementsToRemoveFromFragment(fragment);
1297     for (auto& element : toRemove)
1298         removeElementFromFragmentPreservingChildren(fragment, element);
1299
1300     return WTFMove(fragment);
1301 }
1302
1303 static inline bool hasOneChild(ContainerNode& node)
1304 {
1305     Node* firstChild = node.firstChild();
1306     return firstChild && !firstChild->nextSibling();
1307 }
1308
1309 static inline bool hasOneTextChild(ContainerNode& node)
1310 {
1311     return hasOneChild(node) && node.firstChild()->isTextNode();
1312 }
1313
1314 static inline bool hasMutationEventListeners(const Document& document)
1315 {
1316     return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER)
1317         || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER)
1318         || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER)
1319         || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER)
1320         || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER);
1321 }
1322
1323 // We can use setData instead of replacing Text node as long as script can't observe the difference.
1324 static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope)
1325 {
1326     bool authorScriptMayHaveReference = containerChild.refCount();
1327     return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document());
1328 }
1329
1330 ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment)
1331 {
1332     Ref<ContainerNode> containerNode(container);
1333     ChildListMutationScope mutation(containerNode);
1334
1335     if (!fragment->firstChild()) {
1336         containerNode->removeChildren();
1337         return { };
1338     }
1339
1340     auto* containerChild = containerNode->firstChild();
1341     if (containerChild && !containerChild->nextSibling()) {
1342         if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) {
1343             ASSERT(!fragment->firstChild()->refCount());
1344             downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data());
1345             return { };
1346         }
1347
1348         return containerNode->replaceChild(fragment, *containerChild);
1349     }
1350
1351     containerNode->removeChildren();
1352     return containerNode->appendChild(fragment);
1353 }
1354
1355 }