Move URL from WebCore to WTF
[WebKit-https.git] / Source / WebCore / editing / cocoa / WebContentReaderCocoa.mm
1 /*
2  * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #import "config.h"
27 #import "WebContentReader.h"
28
29 #import "ArchiveResource.h"
30 #import "Blob.h"
31 #import "BlobURL.h"
32 #import "CachedResourceLoader.h"
33 #import "DOMURL.h"
34 #import "Document.h"
35 #import "DocumentFragment.h"
36 #import "DocumentLoader.h"
37 #import "Editor.h"
38 #import "EditorClient.h"
39 #import "File.h"
40 #import "FileSystem.h"
41 #import "Frame.h"
42 #import "FrameLoader.h"
43 #import "FrameLoaderClient.h"
44 #import "HTMLAnchorElement.h"
45 #import "HTMLAttachmentElement.h"
46 #import "HTMLBRElement.h"
47 #import "HTMLBodyElement.h"
48 #import "HTMLIFrameElement.h"
49 #import "HTMLImageElement.h"
50 #import "HTMLObjectElement.h"
51 #import "LegacyWebArchive.h"
52 #import "MIMETypeRegistry.h"
53 #import "Page.h"
54 #import "PublicURLManager.h"
55 #import "RenderView.h"
56 #import "RuntimeEnabledFeatures.h"
57 #import "SerializedAttachmentData.h"
58 #import "Settings.h"
59 #import "SocketProvider.h"
60 #import "TypedElementDescendantIterator.h"
61 #import "UTIUtilities.h"
62 #import "WebArchiveResourceFromNSAttributedString.h"
63 #import "WebArchiveResourceWebResourceHandler.h"
64 #import "WebNSAttributedStringExtras.h"
65 #import "markup.h"
66 #import <pal/spi/cocoa/NSAttributedStringSPI.h>
67 #import <wtf/SoftLinking.h>
68 #import <wtf/URLParser.h>
69
70 #if PLATFORM(MAC)
71 #include "LocalDefaultSystemAppearance.h"
72 #endif
73
74 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
75 @interface NSAttributedString ()
76 - (NSString *)_htmlDocumentFragmentString:(NSRange)range documentAttributes:(NSDictionary *)dict subresources:(NSArray **)subresources;
77 @end
78 #elif PLATFORM(IOS_FAMILY)
79 SOFT_LINK_PRIVATE_FRAMEWORK(WebKitLegacy)
80 #elif PLATFORM(MAC)
81 SOFT_LINK_FRAMEWORK_IN_UMBRELLA(WebKit, WebKitLegacy)
82 #endif
83
84 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED < 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101300)
85 SOFT_LINK(WebKitLegacy, _WebCreateFragment, void, (WebCore::Document& document, NSAttributedString *string, WebCore::FragmentAndResources& result), (document, string, result))
86 #endif
87
88 namespace WebCore {
89
90 #if PLATFORM(IOSMAC)
91
92 static FragmentAndResources createFragment(Frame&, NSAttributedString *)
93 {
94     return { };
95 }
96
97 #elif (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
98
99 static NSDictionary *attributesForAttributedStringConversion()
100 {
101     // This function needs to be kept in sync with identically named one in WebKitLegacy, which is used on older OS versions.
102     RetainPtr<NSMutableArray> excludedElements = adoptNS([[NSMutableArray alloc] initWithObjects:
103         // Omit style since we want style to be inline so the fragment can be easily inserted.
104         @"style",
105         // Omit xml so the result is not XHTML.
106         @"xml",
107         // Omit tags that will get stripped when converted to a fragment anyway.
108         @"doctype", @"html", @"head", @"body",
109         // Omit deprecated tags.
110         @"applet", @"basefont", @"center", @"dir", @"font", @"menu", @"s", @"strike", @"u",
111 #if !ENABLE(ATTACHMENT_ELEMENT)
112         // Omit object so no file attachments are part of the fragment.
113         @"object",
114 #endif
115         nil]);
116
117 #if ENABLE(ATTACHMENT_ELEMENT)
118     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
119         [excludedElements addObject:@"object"];
120 #endif
121
122 #if PLATFORM(IOS_FAMILY)
123     static NSString * const NSExcludedElementsDocumentAttribute = @"ExcludedElements";
124 #endif
125
126     NSURL *baseURL = URL::fakeURLWithRelativePart(emptyString());
127
128     // The output base URL needs +1 refcount to work around the fact that NSHTMLReader over-releases it.
129     CFRetain((__bridge CFTypeRef)baseURL);
130
131     return @{
132         NSExcludedElementsDocumentAttribute: excludedElements.get(),
133         @"InterchangeNewline": @YES,
134         @"CoalesceTabSpans": @YES,
135         @"OutputBaseURL": baseURL,
136         @"WebResourceHandler": [[WebArchiveResourceWebResourceHandler new] autorelease],
137     };
138 }
139
140 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
141 {
142     FragmentAndResources result;
143     Document& document = *frame.document();
144
145 #if PLATFORM(MAC)
146     auto* view = frame.view();
147     LocalDefaultSystemAppearance localAppearance(view ? view->useDarkAppearance() : false);
148 #endif
149
150     NSArray *subresources = nil;
151     NSString *fragmentString = [string _htmlDocumentFragmentString:NSMakeRange(0, [string length]) documentAttributes:attributesForAttributedStringConversion() subresources:&subresources];
152     auto fragment = DocumentFragment::create(document);
153     fragment->parseHTML(fragmentString, document.body(), DisallowScriptingAndPluginContent);
154
155     result.fragment = WTFMove(fragment);
156     for (WebArchiveResourceFromNSAttributedString *resource in subresources)
157         result.resources.append(*resource->resource);
158
159     return result;
160 }
161
162 #else
163
164 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
165 {
166     FragmentAndResources result;
167     _WebCreateFragment(*frame.document(), string, result);
168     return result;
169 }
170
171 #endif
172
173 class DeferredLoadingScope {
174 public:
175     DeferredLoadingScope(Frame& frame)
176         : m_frame(frame)
177         , m_cachedResourceLoader(frame.document()->cachedResourceLoader())
178     {
179         if (!frame.page()->defersLoading()) {
180             frame.page()->setDefersLoading(true);
181             m_didEnabledDeferredLoading = true;
182         }
183
184         if (m_cachedResourceLoader->imagesEnabled()) {
185             m_cachedResourceLoader->setImagesEnabled(false);
186             m_didDisableImage = true;
187         }
188     }
189
190     ~DeferredLoadingScope()
191     {
192         if (m_didDisableImage)
193             m_cachedResourceLoader->setImagesEnabled(true);
194         if (m_didEnabledDeferredLoading)
195             m_frame->page()->setDefersLoading(false);
196     }
197
198 private:
199     Ref<Frame> m_frame;
200     Ref<CachedResourceLoader> m_cachedResourceLoader;
201     bool m_didEnabledDeferredLoading { false };
202     bool m_didDisableImage { false };
203 };
204
205
206 static bool shouldReplaceSubresourceURL(const URL& url)
207 {
208     return !(url.protocolIsInHTTPFamily() || url.protocolIsData());
209 }
210
211 static bool shouldReplaceRichContentWithAttachments()
212 {
213 #if ENABLE(ATTACHMENT_ELEMENT)
214     return RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled();
215 #else
216     return false;
217 #endif
218 }
219
220 #if ENABLE(ATTACHMENT_ELEMENT)
221
222 static bool contentTypeIsSuitableForInlineImageRepresentation(const String& contentType)
223 {
224     return MIMETypeRegistry::isSupportedImageMIMEType(isDeclaredUTI(contentType) ? MIMETypeFromUTI(contentType) : contentType);
225 }
226
227 static bool supportsClientSideAttachmentData(const Frame& frame)
228 {
229     if (auto* client = frame.editor().client())
230         return client->supportsClientSideAttachmentData();
231
232     return false;
233 }
234
235 #endif
236
237 static Ref<DocumentFragment> createFragmentForImageAttachment(Frame& frame, Document& document, Ref<SharedBuffer>&& buffer, const String& contentType)
238 {
239 #if ENABLE(ATTACHMENT_ELEMENT)
240     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
241     // FIXME: This fallback image name needs to be a localized string.
242     String defaultImageAttachmentName { "image"_s };
243
244     auto fragment = document.createDocumentFragment();
245     if (supportsClientSideAttachmentData(frame)) {
246         frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, defaultImageAttachmentName, WTFMove(buffer));
247         if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
248             auto image = HTMLImageElement::create(document);
249             image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, Blob::create(buffer.get(), contentType)));
250             image->setAttachmentElement(WTFMove(attachment));
251             fragment->appendChild(WTFMove(image));
252         } else {
253             attachment->updateAttributes(buffer->size(), contentType, defaultImageAttachmentName);
254             fragment->appendChild(WTFMove(attachment));
255         }
256     } else {
257         attachment->setFile(File::create(Blob::create(buffer.get(), contentType), defaultImageAttachmentName), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
258         fragment->appendChild(WTFMove(attachment));
259     }
260     return fragment;
261 #else
262     UNUSED_PARAM(blob);
263     return document.createDocumentFragment();
264 #endif
265 }
266
267 static void replaceRichContentWithAttachments(Frame& frame, DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources)
268 {
269 #if ENABLE(ATTACHMENT_ELEMENT)
270     struct AttachmentInsertionInfo {
271         String fileName;
272         String contentType;
273         Ref<SharedBuffer> data;
274         Ref<Element> originalElement;
275     };
276
277     ASSERT(RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled());
278     if (subresources.isEmpty())
279         return;
280
281     // FIXME: Handle resources in subframe archives.
282     HashMap<AtomicString, Ref<ArchiveResource>> urlToResourceMap;
283     for (auto& subresource : subresources) {
284         auto& url = subresource->url();
285         if (shouldReplaceSubresourceURL(url))
286             urlToResourceMap.set(url.string(), subresource.copyRef());
287     }
288
289     Vector<SerializedAttachmentData> serializedAttachmentData;
290     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) {
291         auto resourceURL = HTMLAttachmentElement::archiveResourceURL(attachment.uniqueIdentifier());
292         auto resourceEntry = urlToResourceMap.find(resourceURL.string());
293         if (resourceEntry == urlToResourceMap.end())
294             continue;
295
296         auto& resource = resourceEntry->value;
297         serializedAttachmentData.append({ attachment.uniqueIdentifier(), resource->mimeType(), resource->data() });
298     }
299
300     if (!serializedAttachmentData.isEmpty())
301         frame.editor().registerAttachments(WTFMove(serializedAttachmentData));
302
303     Vector<Ref<Element>> elementsToRemove;
304     Vector<AttachmentInsertionInfo> attachmentInsertionInfo;
305     for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) {
306         auto resourceURLString = image.attributeWithoutSynchronization(HTMLNames::srcAttr);
307         if (resourceURLString.isEmpty())
308             continue;
309
310         auto resource = urlToResourceMap.find(resourceURLString);
311         if (resource == urlToResourceMap.end())
312             continue;
313
314         auto name = image.attributeWithoutSynchronization(HTMLNames::altAttr);
315         if (name.isEmpty())
316             name = URL({ }, resourceURLString).lastPathComponent();
317         if (name.isEmpty())
318             name = AtomicString("media");
319
320         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), image });
321     }
322
323     for (auto& object : descendantsOfType<HTMLObjectElement>(fragment)) {
324         auto resourceURLString = object.attributeWithoutSynchronization(HTMLNames::dataAttr);
325         if (resourceURLString.isEmpty()) {
326             elementsToRemove.append(object);
327             continue;
328         }
329
330         auto resource = urlToResourceMap.find(resourceURLString);
331         if (resource == urlToResourceMap.end())
332             continue;
333
334         auto name = URL({ }, resourceURLString).lastPathComponent();
335         if (name.isEmpty())
336             name = AtomicString("file");
337
338         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), object });
339     }
340
341     for (auto& info : attachmentInsertionInfo) {
342         auto originalElement = WTFMove(info.originalElement);
343         auto parent = makeRefPtr(originalElement->parentNode());
344         if (!parent)
345             continue;
346
347         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, fragment.document());
348         if (supportsClientSideAttachmentData(frame)) {
349             if (is<HTMLImageElement>(originalElement.get()) && contentTypeIsSuitableForInlineImageRepresentation(info.contentType)) {
350                 auto& image = downcast<HTMLImageElement>(originalElement.get());
351                 image.setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(*frame.document(), Blob::create(info.data, info.contentType)));
352                 image.setAttachmentElement(attachment.copyRef());
353             } else {
354                 attachment->updateAttributes(info.data->size(), info.contentType, info.fileName);
355                 parent->replaceChild(attachment, WTFMove(originalElement));
356             }
357             frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), WTFMove(info.contentType), WTFMove(info.fileName), WTFMove(info.data));
358         } else {
359             attachment->setFile(File::create(Blob::create(WTFMove(info.data), WTFMove(info.contentType)), WTFMove(info.fileName)), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
360             parent->replaceChild(WTFMove(attachment), WTFMove(originalElement));
361         }
362     }
363
364     for (auto& elementToRemove : elementsToRemove)
365         elementToRemove->remove();
366 #else
367     UNUSED_PARAM(fragment);
368     UNUSED_PARAM(subresources);
369 #endif
370 }
371
372 static void replaceSubresourceURLsWithURLsFromClient(DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources, Vector<Ref<ArchiveResource>>& outUnreplacedResources)
373 {
374     ASSERT(fragment.document().frame());
375     auto& frame = *fragment.document().frame();
376     HashMap<AtomicString, AtomicString> subresourceURLToClientURLMap;
377     for (auto& subresource : subresources) {
378         auto& originalURL = subresource->url();
379         if (!shouldReplaceSubresourceURL(originalURL)) {
380             outUnreplacedResources.append(subresource.copyRef());
381             continue;
382         }
383
384         auto replacementURL = frame.editor().clientReplacementURLForResource(subresource->data(), subresource->mimeType());
385         if (replacementURL.isEmpty()) {
386             outUnreplacedResources.append(subresource.copyRef());
387             continue;
388         }
389
390         subresourceURLToClientURLMap.set(originalURL.string(), replacementURL);
391     }
392
393     if (!subresourceURLToClientURLMap.isEmpty())
394         replaceSubresourceURLs(fragment, WTFMove(subresourceURLToClientURLMap));
395 }
396
397 RefPtr<DocumentFragment> createFragmentAndAddResources(Frame& frame, NSAttributedString *string)
398 {
399     if (!frame.page() || !frame.document())
400         return nullptr;
401
402     auto& document = *frame.document();
403     if (!document.isHTMLDocument() || !string)
404         return nullptr;
405
406     DeferredLoadingScope scope(frame);
407     auto fragmentAndResources = createFragment(frame, string);
408     if (!fragmentAndResources.fragment)
409         return nullptr;
410
411     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
412         if (DocumentLoader* loader = frame.loader().documentLoader()) {
413             for (auto& resource : fragmentAndResources.resources)
414                 loader->addArchiveResource(resource.copyRef());
415         }
416         return WTFMove(fragmentAndResources.fragment);
417     }
418
419     Vector<Ref<ArchiveResource>> unreplacedResources;
420     replaceSubresourceURLsWithURLsFromClient(*fragmentAndResources.fragment, fragmentAndResources.resources, unreplacedResources);
421
422     if (shouldReplaceRichContentWithAttachments()) {
423         replaceRichContentWithAttachments(frame, *fragmentAndResources.fragment, unreplacedResources);
424         return WTFMove(fragmentAndResources.fragment);
425     }
426
427     HashMap<AtomicString, AtomicString> blobURLMap;
428     for (const Ref<ArchiveResource>& subresource : unreplacedResources) {
429         auto blob = Blob::create(subresource->data(), subresource->mimeType());
430         String blobURL = DOMURL::createObjectURL(document, blob);
431         blobURLMap.set(subresource->url().string(), blobURL);
432     }
433
434     replaceSubresourceURLs(*fragmentAndResources.fragment, WTFMove(blobURLMap));
435     return WTFMove(fragmentAndResources.fragment);
436 }
437
438 struct MarkupAndArchive {
439     String markup;
440     Ref<ArchiveResource> mainResource;
441     Ref<Archive> archive;
442 };
443
444 static std::optional<MarkupAndArchive> extractMarkupAndArchive(SharedBuffer& buffer, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
445 {
446     auto archive = LegacyWebArchive::create(URL(), buffer);
447     if (!archive)
448         return std::nullopt;
449
450     RefPtr<ArchiveResource> mainResource = archive->mainResource();
451     if (!mainResource)
452         return std::nullopt;
453
454     auto type = mainResource->mimeType();
455     if (!canShowMIMETypeAsHTML(type))
456         return std::nullopt;
457
458     return MarkupAndArchive { String::fromUTF8(mainResource->data().data(), mainResource->data().size()), mainResource.releaseNonNull(), archive.releaseNonNull() };
459 }
460
461 static String sanitizeMarkupWithArchive(Frame& frame, Document& destinationDocument, MarkupAndArchive& markupAndArchive, MSOListQuirks msoListQuirks, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
462 {
463     auto page = createPageForSanitizingWebContent();
464     Document* stagingDocument = page->mainFrame().document();
465     ASSERT(stagingDocument);
466     auto fragment = createFragmentFromMarkup(*stagingDocument, markupAndArchive.markup, markupAndArchive.mainResource->url(), DisallowScriptingAndPluginContent);
467
468     Vector<Ref<ArchiveResource>> unreplacedResources;
469     replaceSubresourceURLsWithURLsFromClient(fragment, markupAndArchive.archive->subresources(), unreplacedResources);
470
471     if (shouldReplaceRichContentWithAttachments()) {
472         replaceRichContentWithAttachments(frame, fragment, unreplacedResources);
473         return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
474     }
475
476     HashMap<AtomicString, AtomicString> blobURLMap;
477     for (const Ref<ArchiveResource>& subresource : unreplacedResources) {
478         auto& subresourceURL = subresource->url();
479         if (!shouldReplaceSubresourceURL(subresourceURL))
480             continue;
481         auto blob = Blob::create(subresource->data(), subresource->mimeType());
482         String blobURL = DOMURL::createObjectURL(destinationDocument, blob);
483         blobURLMap.set(subresourceURL.string(), blobURL);
484     }
485
486     auto contentOrigin = SecurityOrigin::create(markupAndArchive.mainResource->url());
487     for (const Ref<Archive>& subframeArchive : markupAndArchive.archive->subframeArchives()) {
488         RefPtr<ArchiveResource> subframeMainResource = subframeArchive->mainResource();
489         if (!subframeMainResource)
490             continue;
491
492         auto type = subframeMainResource->mimeType();
493         if (!canShowMIMETypeAsHTML(type))
494             continue;
495
496         auto subframeURL = subframeMainResource->url();
497         if (!shouldReplaceSubresourceURL(subframeURL))
498             continue;
499
500         MarkupAndArchive subframeContent = { String::fromUTF8(subframeMainResource->data().data(), subframeMainResource->data().size()),
501             subframeMainResource.releaseNonNull(), subframeArchive.copyRef() };
502         auto subframeMarkup = sanitizeMarkupWithArchive(frame, destinationDocument, subframeContent, MSOListQuirks::Disabled, canShowMIMETypeAsHTML);
503
504         CString utf8 = subframeMarkup.utf8();
505         Vector<uint8_t> blobBuffer;
506         blobBuffer.reserveCapacity(utf8.length());
507         blobBuffer.append(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
508         auto blob = Blob::create(WTFMove(blobBuffer), type);
509
510         String subframeBlobURL = DOMURL::createObjectURL(destinationDocument, blob);
511         blobURLMap.set(subframeURL.string(), subframeBlobURL);
512     }
513
514     replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap));
515
516     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
517 }
518
519 bool WebContentReader::readWebArchive(SharedBuffer& buffer)
520 {
521     if (frame.settings().preferMIMETypeForImages() || !frame.document())
522         return false;
523
524     DeferredLoadingScope scope(frame);
525     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
526         return frame.loader().client().canShowMIMETypeAsHTML(type);
527     });
528     if (!result)
529         return false;
530     
531     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
532         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
533         if (DocumentLoader* loader = frame.loader().documentLoader())
534             loader->addAllArchiveResources(result->archive.get());
535         return true;
536     }
537
538     if (!shouldSanitize()) {
539         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
540         return true;
541     }
542
543     String sanitizedMarkup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
544         return frame.loader().client().canShowMIMETypeAsHTML(type);
545     });
546     fragment = createFragmentFromMarkup(*frame.document(), sanitizedMarkup, WTF::blankURL(), DisallowScriptingAndPluginContent);
547
548     if (!fragment)
549         return false;
550
551     return true;
552 }
553
554 bool WebContentMarkupReader::readWebArchive(SharedBuffer& buffer)
555 {
556     if (!frame.document())
557         return false;
558
559     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
560         return frame.loader().client().canShowMIMETypeAsHTML(type);
561     });
562     if (!result)
563         return false;
564
565     if (!shouldSanitize()) {
566         markup = result->markup;
567         return true;
568     }
569
570     markup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
571         return frame.loader().client().canShowMIMETypeAsHTML(type);
572     });
573
574     return true;
575 }
576
577 static String stripMicrosoftPrefix(const String& string)
578 {
579 #if PLATFORM(MAC)
580     // This code was added to make HTML paste from Microsoft Word on Mac work, back in 2004.
581     // It's a simple-minded way to ignore the CF_HTML clipboard format, just skipping over the
582     // description part and parsing the entire context plus fragment.
583     if (string.startsWith("Version:")) {
584         size_t location = string.findIgnoringASCIICase("<html");
585         if (location != notFound)
586             return string.substring(location);
587     }
588 #endif
589     return string;
590 }
591
592 bool WebContentReader::readHTML(const String& string)
593 {
594     if (frame.settings().preferMIMETypeForImages() || !frame.document())
595         return false;
596     Document& document = *frame.document();
597
598     String stringOmittingMicrosoftPrefix = stripMicrosoftPrefix(string);
599     if (stringOmittingMicrosoftPrefix.isEmpty())
600         return false;
601
602     String markup;
603     if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) {
604         markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
605             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
606                 return shouldReplaceSubresourceURL(url);
607             });
608         } });
609     } else
610         markup = stringOmittingMicrosoftPrefix;
611
612     addFragment(createFragmentFromMarkup(document, markup, emptyString(), DisallowScriptingAndPluginContent));
613     return true;
614 }
615
616 bool WebContentMarkupReader::readHTML(const String& string)
617 {
618     if (!frame.document())
619         return false;
620
621     String rawHTML = stripMicrosoftPrefix(string);
622     if (shouldSanitize()) {
623         markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
624             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
625                 return shouldReplaceSubresourceURL(url);
626             });
627         } });
628     } else
629         markup = rawHTML;
630
631     return !markup.isEmpty();
632 }
633
634 bool WebContentReader::readRTFD(SharedBuffer& buffer)
635 {
636     if (frame.settings().preferMIMETypeForImages() || !frame.document())
637         return false;
638
639     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
640     auto fragment = createFragmentAndAddResources(frame, string.get());
641     if (!fragment)
642         return false;
643     addFragment(fragment.releaseNonNull());
644
645     return true;
646 }
647
648 bool WebContentMarkupReader::readRTFD(SharedBuffer& buffer)
649 {
650     if (!frame.document())
651         return false;
652     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
653     auto fragment = createFragmentAndAddResources(frame, string.get());
654     if (!fragment)
655         return false;
656
657     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
658     return true;
659 }
660
661 bool WebContentReader::readRTF(SharedBuffer& buffer)
662 {
663     if (frame.settings().preferMIMETypeForImages())
664         return false;
665
666     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
667     auto fragment = createFragmentAndAddResources(frame, string.get());
668     if (!fragment)
669         return false;
670     addFragment(fragment.releaseNonNull());
671
672     return true;
673 }
674
675 bool WebContentMarkupReader::readRTF(SharedBuffer& buffer)
676 {
677     if (!frame.document())
678         return false;
679     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
680     auto fragment = createFragmentAndAddResources(frame, string.get());
681     if (!fragment)
682         return false;
683     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
684     return true;
685 }
686
687 bool WebContentReader::readPlainText(const String& text)
688 {
689     if (!allowPlainText)
690         return false;
691
692     addFragment(createFragmentFromText(context, [text precomposedStringWithCanonicalMapping]));
693
694     madeFragmentFromPlainText = true;
695     return true;
696 }
697
698 bool WebContentReader::readImage(Ref<SharedBuffer>&& buffer, const String& type)
699 {
700     ASSERT(frame.document());
701     auto& document = *frame.document();
702
703     auto replacementURL = frame.editor().clientReplacementURLForResource(buffer.copyRef(), isDeclaredUTI(type) ? MIMETypeFromUTI(type) : type);
704     if (!replacementURL.isEmpty()) {
705         addFragment(createFragmentForImageAndURL(document, replacementURL));
706         return true;
707     }
708
709     if (shouldReplaceRichContentWithAttachments())
710         addFragment(createFragmentForImageAttachment(frame, document, WTFMove(buffer), type));
711     else
712         addFragment(createFragmentForImageAndURL(document, DOMURL::createObjectURL(document, Blob::create(buffer.get(), type))));
713
714     return fragment;
715 }
716
717 bool WebContentReader::readFilePaths(const Vector<String>& paths)
718 {
719     if (paths.isEmpty() || !frame.document())
720         return false;
721
722     auto& document = *frame.document();
723     if (!fragment)
724         fragment = document.createDocumentFragment();
725
726 #if ENABLE(ATTACHMENT_ELEMENT)
727     if (RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) {
728         for (auto& path : paths) {
729             auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
730             if (supportsClientSideAttachmentData(frame)) {
731                 String contentType;
732                 std::optional<uint64_t> fileSizeForDisplay;
733                 if (FileSystem::fileIsDirectory(path, FileSystem::ShouldFollowSymbolicLinks::Yes))
734                     contentType = kUTTypeDirectory;
735                 else {
736                     long long fileSize;
737                     FileSystem::getFileSize(path, fileSize);
738                     fileSizeForDisplay = fileSize;
739                     contentType = File::contentTypeForFile(path);
740                     if (contentType.isEmpty())
741                         contentType = kUTTypeData;
742                 }
743                 frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, path);
744                 if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
745                     auto image = HTMLImageElement::create(document);
746                     image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(path)));
747                     image->setAttachmentElement(WTFMove(attachment));
748                     fragment->appendChild(image);
749                 } else {
750                     attachment->updateAttributes(WTFMove(fileSizeForDisplay), contentType, FileSystem::pathGetFileName(path));
751                     fragment->appendChild(attachment);
752                 }
753             } else {
754                 attachment->setFile(File::create(path), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
755                 fragment->appendChild(attachment);
756             }
757         }
758     }
759 #endif
760
761     return true;
762 }
763
764 bool WebContentReader::readURL(const URL& url, const String& title)
765 {
766     if (url.isEmpty())
767         return false;
768
769 #if PLATFORM(IOS_FAMILY)
770     // FIXME: This code shouldn't be accessing selection and changing the behavior.
771     if (!frame.editor().client()->hasRichlyEditableSelection()) {
772         if (readPlainText([(NSURL *)url absoluteString]))
773             return true;
774     }
775
776     if ([(NSURL *)url isFileURL])
777         return false;
778 #endif // PLATFORM(IOS_FAMILY)
779
780     auto document = makeRef(*frame.document());
781     auto anchor = HTMLAnchorElement::create(document.get());
782     anchor->setAttributeWithoutSynchronization(HTMLNames::hrefAttr, url.string());
783
784     NSString *linkText = title.isEmpty() ? [(NSURL *)url absoluteString] : (NSString *)title;
785     anchor->appendChild(document->createTextNode([linkText precomposedStringWithCanonicalMapping]));
786
787     auto newFragment = document->createDocumentFragment();
788     if (fragment)
789         newFragment->appendChild(HTMLBRElement::create(document.get()));
790     newFragment->appendChild(anchor);
791     addFragment(WTFMove(newFragment));
792     return true;
793 }
794
795 }