e4b90bdf27052b032f9a3ce433543d1f87784f67
[WebKit-https.git] / Source / WebCore / editing / cocoa / WebContentReaderCocoa.mm
1 /*
2  * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #import "config.h"
27 #import "WebContentReader.h"
28
29 #import "ArchiveResource.h"
30 #import "Blob.h"
31 #import "BlobURL.h"
32 #import "CachedResourceLoader.h"
33 #import "DOMURL.h"
34 #import "Document.h"
35 #import "DocumentFragment.h"
36 #import "DocumentLoader.h"
37 #import "File.h"
38 #import "FileSystem.h"
39 #import "Frame.h"
40 #import "FrameLoader.h"
41 #import "FrameLoaderClient.h"
42 #import "HTMLAnchorElement.h"
43 #import "HTMLAttachmentElement.h"
44 #import "HTMLBRElement.h"
45 #import "HTMLBodyElement.h"
46 #import "HTMLIFrameElement.h"
47 #import "HTMLImageElement.h"
48 #import "HTMLObjectElement.h"
49 #import "LegacyWebArchive.h"
50 #import "MIMETypeRegistry.h"
51 #import "Page.h"
52 #import "PublicURLManager.h"
53 #import "RenderView.h"
54 #import "RuntimeEnabledFeatures.h"
55 #import "SerializedAttachmentData.h"
56 #import "Settings.h"
57 #import "SocketProvider.h"
58 #import "TypedElementDescendantIterator.h"
59 #import "URLParser.h"
60 #import "UTIUtilities.h"
61 #import "WebArchiveResourceFromNSAttributedString.h"
62 #import "WebArchiveResourceWebResourceHandler.h"
63 #import "WebNSAttributedStringExtras.h"
64 #import "markup.h"
65 #import <pal/spi/cocoa/NSAttributedStringSPI.h>
66 #import <wtf/SoftLinking.h>
67
68 #if PLATFORM(MAC)
69 #include "LocalDefaultSystemAppearance.h"
70 #endif
71
72 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
73 @interface NSAttributedString ()
74 - (NSString *)_htmlDocumentFragmentString:(NSRange)range documentAttributes:(NSDictionary *)dict subresources:(NSArray **)subresources;
75 @end
76 #elif PLATFORM(IOS_FAMILY)
77 SOFT_LINK_PRIVATE_FRAMEWORK(WebKitLegacy)
78 #elif PLATFORM(MAC)
79 SOFT_LINK_FRAMEWORK_IN_UMBRELLA(WebKit, WebKitLegacy)
80 #endif
81
82 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED < 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101300)
83 SOFT_LINK(WebKitLegacy, _WebCreateFragment, void, (WebCore::Document& document, NSAttributedString *string, WebCore::FragmentAndResources& result), (document, string, result))
84 #endif
85
86 namespace WebCore {
87
88 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
89
90 static NSDictionary *attributesForAttributedStringConversion()
91 {
92     // This function needs to be kept in sync with identically named one in WebKitLegacy, which is used on older OS versions.
93     RetainPtr<NSMutableArray> excludedElements = adoptNS([[NSMutableArray alloc] initWithObjects:
94         // Omit style since we want style to be inline so the fragment can be easily inserted.
95         @"style",
96         // Omit xml so the result is not XHTML.
97         @"xml",
98         // Omit tags that will get stripped when converted to a fragment anyway.
99         @"doctype", @"html", @"head", @"body",
100         // Omit deprecated tags.
101         @"applet", @"basefont", @"center", @"dir", @"font", @"menu", @"s", @"strike", @"u",
102 #if !ENABLE(ATTACHMENT_ELEMENT)
103         // Omit object so no file attachments are part of the fragment.
104         @"object",
105 #endif
106         nil]);
107
108 #if ENABLE(ATTACHMENT_ELEMENT)
109     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
110         [excludedElements addObject:@"object"];
111 #endif
112
113 #if PLATFORM(IOS_FAMILY)
114     static NSString * const NSExcludedElementsDocumentAttribute = @"ExcludedElements";
115 #endif
116
117     NSURL *baseURL = URL::fakeURLWithRelativePart(emptyString());
118
119     // The output base URL needs +1 refcount to work around the fact that NSHTMLReader over-releases it.
120     CFRetain((__bridge CFTypeRef)baseURL);
121
122     return @{
123         NSExcludedElementsDocumentAttribute: excludedElements.get(),
124         @"InterchangeNewline": @YES,
125         @"CoalesceTabSpans": @YES,
126         @"OutputBaseURL": baseURL,
127         @"WebResourceHandler": [[WebArchiveResourceWebResourceHandler new] autorelease],
128     };
129 }
130
131 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
132 {
133     FragmentAndResources result;
134     Document& document = *frame.document();
135
136 #if PLATFORM(MAC)
137     auto* view = frame.view();
138     LocalDefaultSystemAppearance localAppearance(view ? view->useDarkAppearance() : false);
139 #endif
140
141     NSArray *subresources = nil;
142     NSString *fragmentString = [string _htmlDocumentFragmentString:NSMakeRange(0, [string length]) documentAttributes:attributesForAttributedStringConversion() subresources:&subresources];
143     auto fragment = DocumentFragment::create(document);
144     fragment->parseHTML(fragmentString, document.body(), DisallowScriptingAndPluginContent);
145
146     result.fragment = WTFMove(fragment);
147     for (WebArchiveResourceFromNSAttributedString *resource in subresources)
148         result.resources.append(*resource->resource);
149
150     return result;
151 }
152
153 #else
154
155 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
156 {
157     FragmentAndResources result;
158     _WebCreateFragment(*frame.document(), string, result);
159     return result;
160 }
161
162 #endif
163
164 class DeferredLoadingScope {
165 public:
166     DeferredLoadingScope(Frame& frame)
167         : m_frame(frame)
168         , m_cachedResourceLoader(frame.document()->cachedResourceLoader())
169     {
170         if (!frame.page()->defersLoading()) {
171             frame.page()->setDefersLoading(true);
172             m_didEnabledDeferredLoading = true;
173         }
174
175         if (m_cachedResourceLoader->imagesEnabled()) {
176             m_cachedResourceLoader->setImagesEnabled(false);
177             m_didDisableImage = true;
178         }
179     }
180
181     ~DeferredLoadingScope()
182     {
183         if (m_didDisableImage)
184             m_cachedResourceLoader->setImagesEnabled(true);
185         if (m_didEnabledDeferredLoading)
186             m_frame->page()->setDefersLoading(false);
187     }
188
189 private:
190     Ref<Frame> m_frame;
191     Ref<CachedResourceLoader> m_cachedResourceLoader;
192     bool m_didEnabledDeferredLoading { false };
193     bool m_didDisableImage { false };
194 };
195
196
197 static bool shouldReplaceSubresourceURL(const URL& url)
198 {
199     return !(url.protocolIsInHTTPFamily() || url.protocolIsData());
200 }
201
202 static bool shouldReplaceRichContentWithAttachments()
203 {
204 #if ENABLE(ATTACHMENT_ELEMENT)
205     return RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled();
206 #else
207     return false;
208 #endif
209 }
210
211 #if ENABLE(ATTACHMENT_ELEMENT)
212
213 static bool contentTypeIsSuitableForInlineImageRepresentation(const String& contentType)
214 {
215     return MIMETypeRegistry::isSupportedImageMIMEType(isDeclaredUTI(contentType) ? MIMETypeFromUTI(contentType) : contentType);
216 }
217
218 static bool supportsClientSideAttachmentData(const Frame& frame)
219 {
220     if (auto* client = frame.editor().client())
221         return client->supportsClientSideAttachmentData();
222
223     return false;
224 }
225
226 #endif
227
228 static Ref<DocumentFragment> createFragmentForImageAttachment(Frame& frame, Document& document, Ref<SharedBuffer>&& buffer, const String& contentType)
229 {
230 #if ENABLE(ATTACHMENT_ELEMENT)
231     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
232     // FIXME: This fallback image name needs to be a localized string.
233     String defaultImageAttachmentName { "image"_s };
234
235     auto fragment = document.createDocumentFragment();
236     if (supportsClientSideAttachmentData(frame)) {
237         frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, defaultImageAttachmentName, WTFMove(buffer));
238         if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
239             auto image = HTMLImageElement::create(document);
240             image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, Blob::create(buffer.get(), contentType)));
241             image->setAttachmentElement(WTFMove(attachment));
242             fragment->appendChild(WTFMove(image));
243         } else {
244             attachment->updateAttributes(buffer->size(), contentType, defaultImageAttachmentName);
245             fragment->appendChild(WTFMove(attachment));
246         }
247     } else {
248         attachment->setFile(File::create(Blob::create(buffer.get(), contentType), defaultImageAttachmentName), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
249         fragment->appendChild(WTFMove(attachment));
250     }
251     return fragment;
252 #else
253     UNUSED_PARAM(blob);
254     return document.createDocumentFragment();
255 #endif
256 }
257
258 static void replaceRichContentWithAttachments(Frame& frame, DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources)
259 {
260 #if ENABLE(ATTACHMENT_ELEMENT)
261     struct AttachmentInsertionInfo {
262         String fileName;
263         String contentType;
264         Ref<SharedBuffer> data;
265         Ref<Element> originalElement;
266     };
267
268     ASSERT(RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled());
269     if (subresources.isEmpty())
270         return;
271
272     // FIXME: Handle resources in subframe archives.
273     HashMap<AtomicString, Ref<ArchiveResource>> urlToResourceMap;
274     for (auto& subresource : subresources) {
275         auto& url = subresource->url();
276         if (shouldReplaceSubresourceURL(url))
277             urlToResourceMap.set(url.string(), subresource.copyRef());
278     }
279
280     Vector<SerializedAttachmentData> serializedAttachmentData;
281     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) {
282         auto resourceURL = HTMLAttachmentElement::archiveResourceURL(attachment.uniqueIdentifier());
283         auto resourceEntry = urlToResourceMap.find(resourceURL.string());
284         if (resourceEntry == urlToResourceMap.end())
285             continue;
286
287         auto& resource = resourceEntry->value;
288         serializedAttachmentData.append({ attachment.uniqueIdentifier(), resource->mimeType(), resource->data() });
289     }
290
291     if (!serializedAttachmentData.isEmpty())
292         frame.editor().registerAttachments(WTFMove(serializedAttachmentData));
293
294     Vector<Ref<Element>> elementsToRemove;
295     Vector<AttachmentInsertionInfo> attachmentInsertionInfo;
296     for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) {
297         auto resourceURLString = image.attributeWithoutSynchronization(HTMLNames::srcAttr);
298         if (resourceURLString.isEmpty())
299             continue;
300
301         auto resource = urlToResourceMap.find(resourceURLString);
302         if (resource == urlToResourceMap.end())
303             continue;
304
305         auto name = image.attributeWithoutSynchronization(HTMLNames::altAttr);
306         if (name.isEmpty())
307             name = URLParser { resourceURLString }.result().lastPathComponent();
308         if (name.isEmpty())
309             name = AtomicString("media");
310
311         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), image });
312     }
313
314     for (auto& object : descendantsOfType<HTMLObjectElement>(fragment)) {
315         auto resourceURLString = object.attributeWithoutSynchronization(HTMLNames::dataAttr);
316         if (resourceURLString.isEmpty()) {
317             elementsToRemove.append(object);
318             continue;
319         }
320
321         auto resource = urlToResourceMap.find(resourceURLString);
322         if (resource == urlToResourceMap.end())
323             continue;
324
325         auto name = URLParser { resourceURLString }.result().lastPathComponent();
326         if (name.isEmpty())
327             name = AtomicString("file");
328
329         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), object });
330     }
331
332     for (auto& info : attachmentInsertionInfo) {
333         auto originalElement = WTFMove(info.originalElement);
334         auto parent = makeRefPtr(originalElement->parentNode());
335         if (!parent)
336             continue;
337
338         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, fragment.document());
339         if (supportsClientSideAttachmentData(frame)) {
340             if (is<HTMLImageElement>(originalElement.get()) && contentTypeIsSuitableForInlineImageRepresentation(info.contentType)) {
341                 auto& image = downcast<HTMLImageElement>(originalElement.get());
342                 image.setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(*frame.document(), Blob::create(info.data, info.contentType)));
343                 image.setAttachmentElement(attachment.copyRef());
344             } else {
345                 attachment->updateAttributes(info.data->size(), info.contentType, info.fileName);
346                 parent->replaceChild(attachment, WTFMove(originalElement));
347             }
348             frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), WTFMove(info.contentType), WTFMove(info.fileName), WTFMove(info.data));
349         } else {
350             attachment->setFile(File::create(Blob::create(WTFMove(info.data), WTFMove(info.contentType)), WTFMove(info.fileName)), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
351             parent->replaceChild(WTFMove(attachment), WTFMove(originalElement));
352         }
353     }
354
355     for (auto& elementToRemove : elementsToRemove)
356         elementToRemove->remove();
357 #else
358     UNUSED_PARAM(fragment);
359     UNUSED_PARAM(subresources);
360 #endif
361 }
362
363 static void replaceSubresourceURLsWithURLsFromClient(DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources, Vector<Ref<ArchiveResource>>& outUnreplacedResources)
364 {
365     ASSERT(fragment.document().frame());
366     auto& frame = *fragment.document().frame();
367     HashMap<AtomicString, AtomicString> subresourceURLToClientURLMap;
368     for (auto& subresource : subresources) {
369         auto& originalURL = subresource->url();
370         if (!shouldReplaceSubresourceURL(originalURL)) {
371             outUnreplacedResources.append(subresource.copyRef());
372             continue;
373         }
374
375         auto replacementURL = frame.editor().clientReplacementURLForResource(subresource->data(), subresource->mimeType());
376         if (replacementURL.isEmpty()) {
377             outUnreplacedResources.append(subresource.copyRef());
378             continue;
379         }
380
381         subresourceURLToClientURLMap.set(originalURL.string(), replacementURL);
382     }
383
384     if (!subresourceURLToClientURLMap.isEmpty())
385         replaceSubresourceURLs(fragment, WTFMove(subresourceURLToClientURLMap));
386 }
387
388 RefPtr<DocumentFragment> createFragmentAndAddResources(Frame& frame, NSAttributedString *string)
389 {
390     if (!frame.page() || !frame.document())
391         return nullptr;
392
393     auto& document = *frame.document();
394     if (!document.isHTMLDocument() || !string)
395         return nullptr;
396
397     DeferredLoadingScope scope(frame);
398     auto fragmentAndResources = createFragment(frame, string);
399     if (!fragmentAndResources.fragment)
400         return nullptr;
401
402     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
403         if (DocumentLoader* loader = frame.loader().documentLoader()) {
404             for (auto& resource : fragmentAndResources.resources)
405                 loader->addArchiveResource(resource.copyRef());
406         }
407         return WTFMove(fragmentAndResources.fragment);
408     }
409
410     Vector<Ref<ArchiveResource>> unreplacedResources;
411     replaceSubresourceURLsWithURLsFromClient(*fragmentAndResources.fragment, fragmentAndResources.resources, unreplacedResources);
412
413     if (shouldReplaceRichContentWithAttachments()) {
414         replaceRichContentWithAttachments(frame, *fragmentAndResources.fragment, unreplacedResources);
415         return WTFMove(fragmentAndResources.fragment);
416     }
417
418     HashMap<AtomicString, AtomicString> blobURLMap;
419     for (const Ref<ArchiveResource>& subresource : unreplacedResources) {
420         auto blob = Blob::create(subresource->data(), subresource->mimeType());
421         String blobURL = DOMURL::createObjectURL(document, blob);
422         blobURLMap.set(subresource->url().string(), blobURL);
423     }
424
425     replaceSubresourceURLs(*fragmentAndResources.fragment, WTFMove(blobURLMap));
426     return WTFMove(fragmentAndResources.fragment);
427 }
428
429 struct MarkupAndArchive {
430     String markup;
431     Ref<ArchiveResource> mainResource;
432     Ref<Archive> archive;
433 };
434
435 static std::optional<MarkupAndArchive> extractMarkupAndArchive(SharedBuffer& buffer, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
436 {
437     auto archive = LegacyWebArchive::create(URL(), buffer);
438     if (!archive)
439         return std::nullopt;
440
441     RefPtr<ArchiveResource> mainResource = archive->mainResource();
442     if (!mainResource)
443         return std::nullopt;
444
445     auto type = mainResource->mimeType();
446     if (!canShowMIMETypeAsHTML(type))
447         return std::nullopt;
448
449     return MarkupAndArchive { String::fromUTF8(mainResource->data().data(), mainResource->data().size()), mainResource.releaseNonNull(), archive.releaseNonNull() };
450 }
451
452 static String sanitizeMarkupWithArchive(Frame& frame, Document& destinationDocument, MarkupAndArchive& markupAndArchive, MSOListQuirks msoListQuirks, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
453 {
454     auto page = createPageForSanitizingWebContent();
455     Document* stagingDocument = page->mainFrame().document();
456     ASSERT(stagingDocument);
457     auto fragment = createFragmentFromMarkup(*stagingDocument, markupAndArchive.markup, markupAndArchive.mainResource->url(), DisallowScriptingAndPluginContent);
458
459     Vector<Ref<ArchiveResource>> unreplacedResources;
460     replaceSubresourceURLsWithURLsFromClient(fragment, markupAndArchive.archive->subresources(), unreplacedResources);
461
462     if (shouldReplaceRichContentWithAttachments()) {
463         replaceRichContentWithAttachments(frame, fragment, unreplacedResources);
464         return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
465     }
466
467     HashMap<AtomicString, AtomicString> blobURLMap;
468     for (const Ref<ArchiveResource>& subresource : unreplacedResources) {
469         auto& subresourceURL = subresource->url();
470         if (!shouldReplaceSubresourceURL(subresourceURL))
471             continue;
472         auto blob = Blob::create(subresource->data(), subresource->mimeType());
473         String blobURL = DOMURL::createObjectURL(destinationDocument, blob);
474         blobURLMap.set(subresourceURL.string(), blobURL);
475     }
476
477     auto contentOrigin = SecurityOrigin::create(markupAndArchive.mainResource->url());
478     for (const Ref<Archive>& subframeArchive : markupAndArchive.archive->subframeArchives()) {
479         RefPtr<ArchiveResource> subframeMainResource = subframeArchive->mainResource();
480         if (!subframeMainResource)
481             continue;
482
483         auto type = subframeMainResource->mimeType();
484         if (!canShowMIMETypeAsHTML(type))
485             continue;
486
487         auto subframeURL = subframeMainResource->url();
488         if (!shouldReplaceSubresourceURL(subframeURL))
489             continue;
490
491         MarkupAndArchive subframeContent = { String::fromUTF8(subframeMainResource->data().data(), subframeMainResource->data().size()),
492             subframeMainResource.releaseNonNull(), subframeArchive.copyRef() };
493         auto subframeMarkup = sanitizeMarkupWithArchive(frame, destinationDocument, subframeContent, MSOListQuirks::Disabled, canShowMIMETypeAsHTML);
494
495         CString utf8 = subframeMarkup.utf8();
496         Vector<uint8_t> blobBuffer;
497         blobBuffer.reserveCapacity(utf8.length());
498         blobBuffer.append(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
499         auto blob = Blob::create(WTFMove(blobBuffer), type);
500
501         String subframeBlobURL = DOMURL::createObjectURL(destinationDocument, blob);
502         blobURLMap.set(subframeURL.string(), subframeBlobURL);
503     }
504
505     replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap));
506
507     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
508 }
509
510 bool WebContentReader::readWebArchive(SharedBuffer& buffer)
511 {
512     if (frame.settings().preferMIMETypeForImages() || !frame.document())
513         return false;
514
515     DeferredLoadingScope scope(frame);
516     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
517         return frame.loader().client().canShowMIMETypeAsHTML(type);
518     });
519     if (!result)
520         return false;
521     
522     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
523         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
524         if (DocumentLoader* loader = frame.loader().documentLoader())
525             loader->addAllArchiveResources(result->archive.get());
526         return true;
527     }
528
529     if (!shouldSanitize()) {
530         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
531         return true;
532     }
533
534     String sanitizedMarkup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
535         return frame.loader().client().canShowMIMETypeAsHTML(type);
536     });
537     fragment = createFragmentFromMarkup(*frame.document(), sanitizedMarkup, blankURL(), DisallowScriptingAndPluginContent);
538
539     if (!fragment)
540         return false;
541
542     return true;
543 }
544
545 bool WebContentMarkupReader::readWebArchive(SharedBuffer& buffer)
546 {
547     if (!frame.document())
548         return false;
549
550     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
551         return frame.loader().client().canShowMIMETypeAsHTML(type);
552     });
553     if (!result)
554         return false;
555
556     if (!shouldSanitize()) {
557         markup = result->markup;
558         return true;
559     }
560
561     markup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
562         return frame.loader().client().canShowMIMETypeAsHTML(type);
563     });
564
565     return true;
566 }
567
568 static String stripMicrosoftPrefix(const String& string)
569 {
570 #if PLATFORM(MAC)
571     // This code was added to make HTML paste from Microsoft Word on Mac work, back in 2004.
572     // It's a simple-minded way to ignore the CF_HTML clipboard format, just skipping over the
573     // description part and parsing the entire context plus fragment.
574     if (string.startsWith("Version:")) {
575         size_t location = string.findIgnoringASCIICase("<html");
576         if (location != notFound)
577             return string.substring(location);
578     }
579 #endif
580     return string;
581 }
582
583 bool WebContentReader::readHTML(const String& string)
584 {
585     if (frame.settings().preferMIMETypeForImages() || !frame.document())
586         return false;
587     Document& document = *frame.document();
588
589     String stringOmittingMicrosoftPrefix = stripMicrosoftPrefix(string);
590     if (stringOmittingMicrosoftPrefix.isEmpty())
591         return false;
592
593     String markup;
594     if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) {
595         markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
596             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
597                 return shouldReplaceSubresourceURL(url);
598             });
599         } });
600     } else
601         markup = stringOmittingMicrosoftPrefix;
602
603     addFragment(createFragmentFromMarkup(document, markup, emptyString(), DisallowScriptingAndPluginContent));
604     return true;
605 }
606
607 bool WebContentMarkupReader::readHTML(const String& string)
608 {
609     if (!frame.document())
610         return false;
611
612     String rawHTML = stripMicrosoftPrefix(string);
613     if (shouldSanitize()) {
614         markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
615             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
616                 return shouldReplaceSubresourceURL(url);
617             });
618         } });
619     } else
620         markup = rawHTML;
621
622     return !markup.isEmpty();
623 }
624
625 bool WebContentReader::readRTFD(SharedBuffer& buffer)
626 {
627     if (frame.settings().preferMIMETypeForImages() || !frame.document())
628         return false;
629
630     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
631     auto fragment = createFragmentAndAddResources(frame, string.get());
632     if (!fragment)
633         return false;
634     addFragment(fragment.releaseNonNull());
635
636     return true;
637 }
638
639 bool WebContentMarkupReader::readRTFD(SharedBuffer& buffer)
640 {
641     if (!frame.document())
642         return false;
643     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
644     auto fragment = createFragmentAndAddResources(frame, string.get());
645     if (!fragment)
646         return false;
647
648     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
649     return true;
650 }
651
652 bool WebContentReader::readRTF(SharedBuffer& buffer)
653 {
654     if (frame.settings().preferMIMETypeForImages())
655         return false;
656
657     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
658     auto fragment = createFragmentAndAddResources(frame, string.get());
659     if (!fragment)
660         return false;
661     addFragment(fragment.releaseNonNull());
662
663     return true;
664 }
665
666 bool WebContentMarkupReader::readRTF(SharedBuffer& buffer)
667 {
668     if (!frame.document())
669         return false;
670     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
671     auto fragment = createFragmentAndAddResources(frame, string.get());
672     if (!fragment)
673         return false;
674     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
675     return true;
676 }
677
678 bool WebContentReader::readPlainText(const String& text)
679 {
680     if (!allowPlainText)
681         return false;
682
683     addFragment(createFragmentFromText(context, [text precomposedStringWithCanonicalMapping]));
684
685     madeFragmentFromPlainText = true;
686     return true;
687 }
688
689 bool WebContentReader::readImage(Ref<SharedBuffer>&& buffer, const String& type)
690 {
691     ASSERT(frame.document());
692     auto& document = *frame.document();
693
694     auto replacementURL = frame.editor().clientReplacementURLForResource(buffer.copyRef(), isDeclaredUTI(type) ? MIMETypeFromUTI(type) : type);
695     if (!replacementURL.isEmpty()) {
696         addFragment(createFragmentForImageAndURL(document, replacementURL));
697         return true;
698     }
699
700     if (shouldReplaceRichContentWithAttachments())
701         addFragment(createFragmentForImageAttachment(frame, document, WTFMove(buffer), type));
702     else
703         addFragment(createFragmentForImageAndURL(document, DOMURL::createObjectURL(document, Blob::create(buffer.get(), type))));
704
705     return fragment;
706 }
707
708 bool WebContentReader::readFilePaths(const Vector<String>& paths)
709 {
710     if (paths.isEmpty() || !frame.document())
711         return false;
712
713     auto& document = *frame.document();
714     if (!fragment)
715         fragment = document.createDocumentFragment();
716
717 #if ENABLE(ATTACHMENT_ELEMENT)
718     if (RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) {
719         for (auto& path : paths) {
720             auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
721             if (supportsClientSideAttachmentData(frame)) {
722                 String contentType;
723                 std::optional<uint64_t> fileSizeForDisplay;
724                 if (FileSystem::fileIsDirectory(path, FileSystem::ShouldFollowSymbolicLinks::Yes))
725                     contentType = kUTTypeDirectory;
726                 else {
727                     long long fileSize;
728                     FileSystem::getFileSize(path, fileSize);
729                     fileSizeForDisplay = fileSize;
730                     contentType = File::contentTypeForFile(path);
731                     if (contentType.isEmpty())
732                         contentType = kUTTypeData;
733                 }
734                 frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, path);
735                 if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
736                     auto image = HTMLImageElement::create(document);
737                     image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(path)));
738                     image->setAttachmentElement(WTFMove(attachment));
739                     fragment->appendChild(image);
740                 } else {
741                     attachment->updateAttributes(WTFMove(fileSizeForDisplay), contentType, FileSystem::pathGetFileName(path));
742                     fragment->appendChild(attachment);
743                 }
744             } else {
745                 attachment->setFile(File::create(path), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
746                 fragment->appendChild(attachment);
747             }
748         }
749     }
750 #endif
751
752     return true;
753 }
754
755 bool WebContentReader::readURL(const URL& url, const String& title)
756 {
757     if (url.isEmpty())
758         return false;
759
760 #if PLATFORM(IOS_FAMILY)
761     // FIXME: This code shouldn't be accessing selection and changing the behavior.
762     if (!frame.editor().client()->hasRichlyEditableSelection()) {
763         if (readPlainText([(NSURL *)url absoluteString]))
764             return true;
765     }
766
767     if ([(NSURL *)url isFileURL])
768         return false;
769 #endif // PLATFORM(IOS_FAMILY)
770
771     auto document = makeRef(*frame.document());
772     auto anchor = HTMLAnchorElement::create(document.get());
773     anchor->setAttributeWithoutSynchronization(HTMLNames::hrefAttr, url.string());
774
775     NSString *linkText = title.isEmpty() ? [(NSURL *)url absoluteString] : (NSString *)title;
776     anchor->appendChild(document->createTextNode([linkText precomposedStringWithCanonicalMapping]));
777
778     auto newFragment = document->createDocumentFragment();
779     if (fragment)
780         newFragment->appendChild(HTMLBRElement::create(document.get()));
781     newFragment->appendChild(anchor);
782     addFragment(WTFMove(newFragment));
783     return true;
784 }
785
786 }