0cf2fc345a48840e939f47615ccadd26d51c8c4c
[WebKit-https.git] / Source / WebCore / editing / cocoa / WebContentReaderCocoa.mm
1 /*
2  * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #import "config.h"
27 #import "WebContentReader.h"
28
29 #import "ArchiveResource.h"
30 #import "Blob.h"
31 #import "BlobURL.h"
32 #import "CachedResourceLoader.h"
33 #import "CustomHeaderFields.h"
34 #import "DOMURL.h"
35 #import "Document.h"
36 #import "DocumentFragment.h"
37 #import "DocumentLoader.h"
38 #import "Editor.h"
39 #import "EditorClient.h"
40 #import "File.h"
41 #import "Frame.h"
42 #import "FrameLoader.h"
43 #import "FrameLoaderClient.h"
44 #import "HTMLAnchorElement.h"
45 #import "HTMLAttachmentElement.h"
46 #import "HTMLBRElement.h"
47 #import "HTMLBodyElement.h"
48 #import "HTMLDivElement.h"
49 #import "HTMLIFrameElement.h"
50 #import "HTMLImageElement.h"
51 #import "HTMLObjectElement.h"
52 #import "LegacyWebArchive.h"
53 #import "MIMETypeRegistry.h"
54 #import "Page.h"
55 #import "PublicURLManager.h"
56 #import "RenderView.h"
57 #import "RuntimeEnabledFeatures.h"
58 #import "SerializedAttachmentData.h"
59 #import "Settings.h"
60 #import "SocketProvider.h"
61 #import "TypedElementDescendantIterator.h"
62 #import "UTIUtilities.h"
63 #import "WebArchiveResourceFromNSAttributedString.h"
64 #import "WebArchiveResourceWebResourceHandler.h"
65 #import "WebNSAttributedStringExtras.h"
66 #import "markup.h"
67 #import <pal/spi/cocoa/NSAttributedStringSPI.h>
68 #import <wtf/FileSystem.h>
69 #import <wtf/SoftLinking.h>
70 #import <wtf/URLParser.h>
71
72 #if PLATFORM(MAC)
73 #include "LocalDefaultSystemAppearance.h"
74 #endif
75
76 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || PLATFORM(MAC)
77 @interface NSAttributedString ()
78 - (NSString *)_htmlDocumentFragmentString:(NSRange)range documentAttributes:(NSDictionary *)dict subresources:(NSArray **)subresources;
79 @end
80 #elif PLATFORM(IOS_FAMILY)
81 SOFT_LINK_PRIVATE_FRAMEWORK(WebKitLegacy)
82 #elif PLATFORM(MAC)
83 SOFT_LINK_FRAMEWORK_IN_UMBRELLA(WebKit, WebKitLegacy)
84 #endif
85
86 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED < 110000)
87 SOFT_LINK(WebKitLegacy, _WebCreateFragment, void, (WebCore::Document& document, NSAttributedString *string, WebCore::FragmentAndResources& result), (document, string, result))
88 #endif
89
90 namespace WebCore {
91
92 #if PLATFORM(MACCATALYST)
93
94 static FragmentAndResources createFragment(Frame&, NSAttributedString *)
95 {
96     return { };
97 }
98
99 #elif (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || PLATFORM(MAC)
100
101 static NSDictionary *attributesForAttributedStringConversion()
102 {
103     // This function needs to be kept in sync with identically named one in WebKitLegacy, which is used on older OS versions.
104     RetainPtr<NSMutableArray> excludedElements = adoptNS([[NSMutableArray alloc] initWithObjects:
105         // Omit style since we want style to be inline so the fragment can be easily inserted.
106         @"style",
107         // Omit xml so the result is not XHTML.
108         @"xml",
109         // Omit tags that will get stripped when converted to a fragment anyway.
110         @"doctype", @"html", @"head", @"body",
111         // Omit deprecated tags.
112         @"applet", @"basefont", @"center", @"dir", @"font", @"menu", @"s", @"strike", @"u",
113 #if !ENABLE(ATTACHMENT_ELEMENT)
114         // Omit object so no file attachments are part of the fragment.
115         @"object",
116 #endif
117         nil]);
118
119 #if ENABLE(ATTACHMENT_ELEMENT)
120     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
121         [excludedElements addObject:@"object"];
122 #endif
123
124 #if PLATFORM(IOS_FAMILY)
125     static NSString * const NSExcludedElementsDocumentAttribute = @"ExcludedElements";
126 #endif
127
128     NSURL *baseURL = URL::fakeURLWithRelativePart(emptyString());
129
130     // The output base URL needs +1 refcount to work around the fact that NSHTMLReader over-releases it.
131     CFRetain((__bridge CFTypeRef)baseURL);
132
133     return @{
134         NSExcludedElementsDocumentAttribute: excludedElements.get(),
135         @"InterchangeNewline": @YES,
136         @"CoalesceTabSpans": @YES,
137         @"OutputBaseURL": baseURL,
138         @"WebResourceHandler": [[WebArchiveResourceWebResourceHandler new] autorelease],
139     };
140 }
141
142 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
143 {
144     FragmentAndResources result;
145     Document& document = *frame.document();
146
147 #if PLATFORM(MAC)
148     auto* view = frame.view();
149     LocalDefaultSystemAppearance localAppearance(view ? view->useDarkAppearance() : false);
150 #endif
151
152     NSArray *subresources = nil;
153     NSString *fragmentString = [string _htmlDocumentFragmentString:NSMakeRange(0, [string length]) documentAttributes:attributesForAttributedStringConversion() subresources:&subresources];
154     auto fragment = DocumentFragment::create(document);
155     fragment->parseHTML(fragmentString, document.body(), DisallowScriptingAndPluginContent);
156
157     result.fragment = WTFMove(fragment);
158     for (WebArchiveResourceFromNSAttributedString *resource in subresources)
159         result.resources.append(*resource->resource);
160
161     return result;
162 }
163
164 #else
165
166 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
167 {
168     FragmentAndResources result;
169     _WebCreateFragment(*frame.document(), string, result);
170     return result;
171 }
172
173 #endif
174
175 class DeferredLoadingScope {
176 public:
177     DeferredLoadingScope(Frame& frame)
178         : m_frame(frame)
179         , m_cachedResourceLoader(frame.document()->cachedResourceLoader())
180     {
181         if (!frame.page()->defersLoading()) {
182             frame.page()->setDefersLoading(true);
183             m_didEnabledDeferredLoading = true;
184         }
185
186         if (m_cachedResourceLoader->imagesEnabled()) {
187             m_cachedResourceLoader->setImagesEnabled(false);
188             m_didDisableImage = true;
189         }
190     }
191
192     ~DeferredLoadingScope()
193     {
194         if (m_didDisableImage)
195             m_cachedResourceLoader->setImagesEnabled(true);
196         if (m_didEnabledDeferredLoading)
197             m_frame->page()->setDefersLoading(false);
198     }
199
200 private:
201     Ref<Frame> m_frame;
202     Ref<CachedResourceLoader> m_cachedResourceLoader;
203     bool m_didEnabledDeferredLoading { false };
204     bool m_didDisableImage { false };
205 };
206
207
208 static bool shouldReplaceSubresourceURL(const URL& url)
209 {
210     return !(url.protocolIsInHTTPFamily() || url.protocolIsData());
211 }
212
213 static bool shouldReplaceRichContentWithAttachments()
214 {
215 #if ENABLE(ATTACHMENT_ELEMENT)
216     return RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled();
217 #else
218     return false;
219 #endif
220 }
221
222 #if ENABLE(ATTACHMENT_ELEMENT)
223
224 static String mimeTypeFromContentType(const String& contentType)
225 {
226     if (contentType == String(kUTTypeVCard)) {
227         // CoreServices erroneously reports that "public.vcard" maps to "text/directory", rather
228         // than either "text/vcard" or "text/x-vcard". Work around this by special casing the
229         // "public.vcard" UTI type. See <rdar://problem/49478229> for more detail.
230         return "text/vcard"_s;
231     }
232     return isDeclaredUTI(contentType) ? MIMETypeFromUTI(contentType) : contentType;
233 }
234
235 static bool contentTypeIsSuitableForInlineImageRepresentation(const String& contentType)
236 {
237     return MIMETypeRegistry::isSupportedImageMIMEType(mimeTypeFromContentType(contentType));
238 }
239
240 static bool supportsClientSideAttachmentData(const Frame& frame)
241 {
242     if (auto* client = frame.editor().client())
243         return client->supportsClientSideAttachmentData();
244
245     return false;
246 }
247
248 #endif
249
250 static Ref<DocumentFragment> createFragmentForImageAttachment(Frame& frame, Document& document, Ref<SharedBuffer>&& buffer, const String& contentType, PresentationSize preferredSize)
251 {
252 #if ENABLE(ATTACHMENT_ELEMENT)
253     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
254     // FIXME: This fallback image name needs to be a localized string.
255     String defaultImageAttachmentName { "image"_s };
256
257     auto fragment = document.createDocumentFragment();
258     if (supportsClientSideAttachmentData(frame)) {
259         frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, defaultImageAttachmentName, WTFMove(buffer));
260         if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
261             auto image = HTMLImageElement::create(document);
262             image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, Blob::create(document.sessionID(), buffer.get(), contentType)));
263             image->setAttachmentElement(WTFMove(attachment));
264             if (preferredSize.width)
265                 image->setAttributeWithoutSynchronization(HTMLNames::widthAttr, AtomString::number(*preferredSize.width));
266             if (preferredSize.height)
267                 image->setAttributeWithoutSynchronization(HTMLNames::heightAttr, AtomString::number(*preferredSize.height));
268             fragment->appendChild(WTFMove(image));
269         } else {
270             attachment->updateAttributes(buffer->size(), contentType, defaultImageAttachmentName);
271             fragment->appendChild(WTFMove(attachment));
272         }
273     } else {
274         attachment->setFile(File::create(Blob::create(document.sessionID(), buffer.get(), contentType), defaultImageAttachmentName), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
275         fragment->appendChild(WTFMove(attachment));
276     }
277     return fragment;
278 #else
279     UNUSED_PARAM(blob);
280     return document.createDocumentFragment();
281 #endif
282 }
283
284 static void replaceRichContentWithAttachments(Frame& frame, DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources)
285 {
286 #if ENABLE(ATTACHMENT_ELEMENT)
287     struct AttachmentInsertionInfo {
288         String fileName;
289         String contentType;
290         Ref<SharedBuffer> data;
291         Ref<Element> originalElement;
292     };
293
294     ASSERT(RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled());
295     if (subresources.isEmpty())
296         return;
297
298     // FIXME: Handle resources in subframe archives.
299     HashMap<AtomString, Ref<ArchiveResource>> urlToResourceMap;
300     for (auto& subresource : subresources) {
301         auto& url = subresource->url();
302         if (shouldReplaceSubresourceURL(url))
303             urlToResourceMap.set(url.string(), subresource.copyRef());
304     }
305
306     Vector<SerializedAttachmentData> serializedAttachmentData;
307     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) {
308         auto resourceURL = HTMLAttachmentElement::archiveResourceURL(attachment.uniqueIdentifier());
309         auto resourceEntry = urlToResourceMap.find(resourceURL.string());
310         if (resourceEntry == urlToResourceMap.end())
311             continue;
312
313         auto& resource = resourceEntry->value;
314         serializedAttachmentData.append({ attachment.uniqueIdentifier(), resource->mimeType(), resource->data() });
315     }
316
317     if (!serializedAttachmentData.isEmpty())
318         frame.editor().registerAttachments(WTFMove(serializedAttachmentData));
319
320     Vector<Ref<Element>> elementsToRemove;
321     Vector<AttachmentInsertionInfo> attachmentInsertionInfo;
322     for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) {
323         auto resourceURLString = image.attributeWithoutSynchronization(HTMLNames::srcAttr);
324         if (resourceURLString.isEmpty())
325             continue;
326
327         auto resource = urlToResourceMap.find(resourceURLString);
328         if (resource == urlToResourceMap.end())
329             continue;
330
331         auto name = image.attributeWithoutSynchronization(HTMLNames::altAttr);
332         if (name.isEmpty())
333             name = URL({ }, resourceURLString).lastPathComponent();
334         if (name.isEmpty())
335             name = AtomString("media");
336
337         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), image });
338     }
339
340     for (auto& object : descendantsOfType<HTMLObjectElement>(fragment)) {
341         auto resourceURLString = object.attributeWithoutSynchronization(HTMLNames::dataAttr);
342         if (resourceURLString.isEmpty()) {
343             elementsToRemove.append(object);
344             continue;
345         }
346
347         auto resource = urlToResourceMap.find(resourceURLString);
348         if (resource == urlToResourceMap.end())
349             continue;
350
351         auto name = URL({ }, resourceURLString).lastPathComponent();
352         if (name.isEmpty())
353             name = AtomString("file");
354
355         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), object });
356     }
357
358     for (auto& info : attachmentInsertionInfo) {
359         auto originalElement = WTFMove(info.originalElement);
360         auto parent = makeRefPtr(originalElement->parentNode());
361         if (!parent)
362             continue;
363
364         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, fragment.document());
365         if (supportsClientSideAttachmentData(frame)) {
366             if (is<HTMLImageElement>(originalElement.get()) && contentTypeIsSuitableForInlineImageRepresentation(info.contentType)) {
367                 auto& image = downcast<HTMLImageElement>(originalElement.get());
368                 image.setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(*frame.document(), Blob::create(fragment.document().sessionID(), info.data, info.contentType)));
369                 image.setAttachmentElement(attachment.copyRef());
370             } else {
371                 attachment->updateAttributes(info.data->size(), info.contentType, info.fileName);
372                 parent->replaceChild(attachment, WTFMove(originalElement));
373             }
374             frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), WTFMove(info.contentType), WTFMove(info.fileName), WTFMove(info.data));
375         } else {
376             attachment->setFile(File::create(Blob::create(fragment.document().sessionID(), WTFMove(info.data), WTFMove(info.contentType)), WTFMove(info.fileName)), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
377             parent->replaceChild(WTFMove(attachment), WTFMove(originalElement));
378         }
379     }
380
381     for (auto& elementToRemove : elementsToRemove)
382         elementToRemove->remove();
383 #else
384     UNUSED_PARAM(fragment);
385     UNUSED_PARAM(subresources);
386 #endif
387 }
388
389 RefPtr<DocumentFragment> createFragmentAndAddResources(Frame& frame, NSAttributedString *string)
390 {
391     if (!frame.page() || !frame.document())
392         return nullptr;
393
394     auto& document = *frame.document();
395     if (!document.isHTMLDocument() || !string)
396         return nullptr;
397
398     DeferredLoadingScope scope(frame);
399     auto fragmentAndResources = createFragment(frame, string);
400     if (!fragmentAndResources.fragment)
401         return nullptr;
402
403     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
404         if (DocumentLoader* loader = frame.loader().documentLoader()) {
405             for (auto& resource : fragmentAndResources.resources)
406                 loader->addArchiveResource(resource.copyRef());
407         }
408         return WTFMove(fragmentAndResources.fragment);
409     }
410
411     if (shouldReplaceRichContentWithAttachments()) {
412         replaceRichContentWithAttachments(frame, *fragmentAndResources.fragment, fragmentAndResources.resources);
413         return WTFMove(fragmentAndResources.fragment);
414     }
415
416     HashMap<AtomString, AtomString> blobURLMap;
417     for (const Ref<ArchiveResource>& subresource : fragmentAndResources.resources) {
418         auto blob = Blob::create(document.sessionID(), subresource->data(), subresource->mimeType());
419         String blobURL = DOMURL::createObjectURL(document, blob);
420         blobURLMap.set(subresource->url().string(), blobURL);
421     }
422
423     replaceSubresourceURLs(*fragmentAndResources.fragment, WTFMove(blobURLMap));
424     return WTFMove(fragmentAndResources.fragment);
425 }
426
427 struct MarkupAndArchive {
428     String markup;
429     Ref<ArchiveResource> mainResource;
430     Ref<Archive> archive;
431 };
432
433 static Optional<MarkupAndArchive> extractMarkupAndArchive(SharedBuffer& buffer, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
434 {
435     auto archive = LegacyWebArchive::create(URL(), buffer);
436     if (!archive)
437         return WTF::nullopt;
438
439     RefPtr<ArchiveResource> mainResource = archive->mainResource();
440     if (!mainResource)
441         return WTF::nullopt;
442
443     auto type = mainResource->mimeType();
444     if (!canShowMIMETypeAsHTML(type))
445         return WTF::nullopt;
446
447     return MarkupAndArchive { String::fromUTF8(mainResource->data().data(), mainResource->data().size()), mainResource.releaseNonNull(), archive.releaseNonNull() };
448 }
449
450 static String sanitizeMarkupWithArchive(Frame& frame, Document& destinationDocument, MarkupAndArchive& markupAndArchive, MSOListQuirks msoListQuirks, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
451 {
452     auto page = createPageForSanitizingWebContent();
453     Document* stagingDocument = page->mainFrame().document();
454     ASSERT(stagingDocument);
455     auto fragment = createFragmentFromMarkup(*stagingDocument, markupAndArchive.markup, markupAndArchive.mainResource->url(), DisallowScriptingAndPluginContent);
456
457     if (shouldReplaceRichContentWithAttachments()) {
458         replaceRichContentWithAttachments(frame, fragment, markupAndArchive.archive->subresources());
459         return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
460     }
461
462     HashMap<AtomString, AtomString> blobURLMap;
463     for (const Ref<ArchiveResource>& subresource : markupAndArchive.archive->subresources()) {
464         auto& subresourceURL = subresource->url();
465         if (!shouldReplaceSubresourceURL(subresourceURL))
466             continue;
467         auto blob = Blob::create(destinationDocument.sessionID(), subresource->data(), subresource->mimeType());
468         String blobURL = DOMURL::createObjectURL(destinationDocument, blob);
469         blobURLMap.set(subresourceURL.string(), blobURL);
470     }
471
472     auto contentOrigin = SecurityOrigin::create(markupAndArchive.mainResource->url());
473     for (const Ref<Archive>& subframeArchive : markupAndArchive.archive->subframeArchives()) {
474         RefPtr<ArchiveResource> subframeMainResource = subframeArchive->mainResource();
475         if (!subframeMainResource)
476             continue;
477
478         auto type = subframeMainResource->mimeType();
479         if (!canShowMIMETypeAsHTML(type))
480             continue;
481
482         auto subframeURL = subframeMainResource->url();
483         if (!shouldReplaceSubresourceURL(subframeURL))
484             continue;
485
486         MarkupAndArchive subframeContent = { String::fromUTF8(subframeMainResource->data().data(), subframeMainResource->data().size()),
487             subframeMainResource.releaseNonNull(), subframeArchive.copyRef() };
488         auto subframeMarkup = sanitizeMarkupWithArchive(frame, destinationDocument, subframeContent, MSOListQuirks::Disabled, canShowMIMETypeAsHTML);
489
490         CString utf8 = subframeMarkup.utf8();
491         Vector<uint8_t> blobBuffer;
492         blobBuffer.reserveCapacity(utf8.length());
493         blobBuffer.append(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
494         auto blob = Blob::create(destinationDocument.sessionID(), WTFMove(blobBuffer), type);
495
496         String subframeBlobURL = DOMURL::createObjectURL(destinationDocument, blob);
497         blobURLMap.set(subframeURL.string(), subframeBlobURL);
498     }
499
500     replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap));
501
502     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
503 }
504
505 bool WebContentReader::readWebArchive(SharedBuffer& buffer)
506 {
507     if (frame.settings().preferMIMETypeForImages() || !frame.document())
508         return false;
509
510     DeferredLoadingScope scope(frame);
511     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
512         return frame.loader().client().canShowMIMETypeAsHTML(type);
513     });
514     if (!result)
515         return false;
516     
517     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
518         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
519         if (DocumentLoader* loader = frame.loader().documentLoader())
520             loader->addAllArchiveResources(result->archive.get());
521         return true;
522     }
523
524     if (!shouldSanitize()) {
525         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
526         return true;
527     }
528
529     String sanitizedMarkup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
530         return frame.loader().client().canShowMIMETypeAsHTML(type);
531     });
532     fragment = createFragmentFromMarkup(*frame.document(), sanitizedMarkup, WTF::blankURL(), DisallowScriptingAndPluginContent);
533
534     if (!fragment)
535         return false;
536
537     return true;
538 }
539
540 bool WebContentMarkupReader::readWebArchive(SharedBuffer& buffer)
541 {
542     if (!frame.document())
543         return false;
544
545     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
546         return frame.loader().client().canShowMIMETypeAsHTML(type);
547     });
548     if (!result)
549         return false;
550
551     if (!shouldSanitize()) {
552         markup = result->markup;
553         return true;
554     }
555
556     markup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
557         return frame.loader().client().canShowMIMETypeAsHTML(type);
558     });
559
560     return true;
561 }
562
563 static String stripMicrosoftPrefix(const String& string)
564 {
565 #if PLATFORM(MAC)
566     // This code was added to make HTML paste from Microsoft Word on Mac work, back in 2004.
567     // It's a simple-minded way to ignore the CF_HTML clipboard format, just skipping over the
568     // description part and parsing the entire context plus fragment.
569     if (string.startsWith("Version:")) {
570         size_t location = string.findIgnoringASCIICase("<html");
571         if (location != notFound)
572             return string.substring(location);
573     }
574 #endif
575     return string;
576 }
577
578 bool WebContentReader::readHTML(const String& string)
579 {
580     if (frame.settings().preferMIMETypeForImages() || !frame.document())
581         return false;
582     Document& document = *frame.document();
583
584     String stringOmittingMicrosoftPrefix = stripMicrosoftPrefix(string);
585     if (stringOmittingMicrosoftPrefix.isEmpty())
586         return false;
587
588     String markup;
589     if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) {
590         markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
591             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
592                 return shouldReplaceSubresourceURL(url);
593             });
594         } });
595     } else
596         markup = stringOmittingMicrosoftPrefix;
597
598     addFragment(createFragmentFromMarkup(document, markup, emptyString(), DisallowScriptingAndPluginContent));
599     return true;
600 }
601
602 bool WebContentMarkupReader::readHTML(const String& string)
603 {
604     if (!frame.document())
605         return false;
606
607     String rawHTML = stripMicrosoftPrefix(string);
608     if (shouldSanitize()) {
609         markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
610             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
611                 return shouldReplaceSubresourceURL(url);
612             });
613         } });
614     } else
615         markup = rawHTML;
616
617     return !markup.isEmpty();
618 }
619
620 bool WebContentReader::readRTFD(SharedBuffer& buffer)
621 {
622     if (frame.settings().preferMIMETypeForImages() || !frame.document())
623         return false;
624
625     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
626     auto fragment = createFragmentAndAddResources(frame, string.get());
627     if (!fragment)
628         return false;
629     addFragment(fragment.releaseNonNull());
630
631     return true;
632 }
633
634 bool WebContentMarkupReader::readRTFD(SharedBuffer& buffer)
635 {
636     if (!frame.document())
637         return false;
638     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
639     auto fragment = createFragmentAndAddResources(frame, string.get());
640     if (!fragment)
641         return false;
642
643     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
644     return true;
645 }
646
647 bool WebContentReader::readRTF(SharedBuffer& buffer)
648 {
649     if (frame.settings().preferMIMETypeForImages())
650         return false;
651
652     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
653     auto fragment = createFragmentAndAddResources(frame, string.get());
654     if (!fragment)
655         return false;
656     addFragment(fragment.releaseNonNull());
657
658     return true;
659 }
660
661 bool WebContentMarkupReader::readRTF(SharedBuffer& buffer)
662 {
663     if (!frame.document())
664         return false;
665     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
666     auto fragment = createFragmentAndAddResources(frame, string.get());
667     if (!fragment)
668         return false;
669     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
670     return true;
671 }
672
673 bool WebContentReader::readPlainText(const String& text)
674 {
675     if (!allowPlainText)
676         return false;
677
678     addFragment(createFragmentFromText(context, [text precomposedStringWithCanonicalMapping]));
679
680     madeFragmentFromPlainText = true;
681     return true;
682 }
683
684 bool WebContentReader::readImage(Ref<SharedBuffer>&& buffer, const String& type, PresentationSize preferredPresentationSize)
685 {
686     ASSERT(frame.document());
687     auto& document = *frame.document();
688     if (shouldReplaceRichContentWithAttachments())
689         addFragment(createFragmentForImageAttachment(frame, document, WTFMove(buffer), type, preferredPresentationSize));
690     else
691         addFragment(createFragmentForImageAndURL(document, DOMURL::createObjectURL(document, Blob::create(document.sessionID(), buffer.get(), type)), preferredPresentationSize));
692
693     return fragment;
694 }
695
696 #if ENABLE(ATTACHMENT_ELEMENT)
697
698 static String typeForAttachmentElement(const String& contentType)
699 {
700     if (contentType.isEmpty())
701         return { };
702
703     auto mimeType = mimeTypeFromContentType(contentType);
704     return mimeType.isEmpty() ? contentType : mimeType;
705 }
706
707 static Ref<HTMLElement> attachmentForFilePath(Frame& frame, const String& path, PresentationSize preferredSize, const String& explicitContentType)
708 {
709     auto document = makeRef(*frame.document());
710     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
711     if (!supportsClientSideAttachmentData(frame)) {
712         attachment->setFile(File::create(document->sessionID(), path), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
713         return attachment;
714     }
715
716     bool isDirectory = FileSystem::fileIsDirectory(path, FileSystem::ShouldFollowSymbolicLinks::Yes);
717     String contentType = typeForAttachmentElement(explicitContentType);
718     if (contentType.isEmpty()) {
719         if (isDirectory)
720             contentType = kUTTypeDirectory;
721         else {
722             contentType = File::contentTypeForFile(path);
723             if (contentType.isEmpty())
724                 contentType = kUTTypeData;
725         }
726     }
727
728     Optional<uint64_t> fileSizeForDisplay;
729     if (!isDirectory) {
730         long long fileSize;
731         FileSystem::getFileSize(path, fileSize);
732         fileSizeForDisplay = fileSize;
733     }
734
735     frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, path);
736
737     if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
738         auto image = HTMLImageElement::create(document);
739         image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(document->sessionID(), path)));
740         image->setAttachmentElement(WTFMove(attachment));
741         if (preferredSize.width)
742             image->setAttributeWithoutSynchronization(HTMLNames::widthAttr, AtomString::number(*preferredSize.width));
743         if (preferredSize.height)
744             image->setAttributeWithoutSynchronization(HTMLNames::heightAttr, AtomString::number(*preferredSize.height));
745         return image;
746     }
747
748     attachment->updateAttributes(WTFMove(fileSizeForDisplay), contentType, FileSystem::pathGetFileName(path));
749     return attachment;
750 }
751
752 static Ref<HTMLElement> attachmentForData(Frame& frame, SharedBuffer& buffer, const String& contentType, const String& name, PresentationSize preferredSize)
753 {
754     auto document = makeRef(*frame.document());
755     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
756     auto attachmentType = typeForAttachmentElement(contentType);
757
758     // FIXME: We should instead ask CoreServices for a preferred name corresponding to the given content type.
759     static const char* defaultAttachmentName = "file";
760
761     String fileName;
762     if (name.isEmpty())
763         fileName = defaultAttachmentName;
764     else
765         fileName = name;
766
767     if (!supportsClientSideAttachmentData(frame)) {
768         attachment->setFile(File::create(Blob::create(document->sessionID(), buffer, WTFMove(attachmentType)), fileName));
769         return attachment;
770     }
771
772     frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), attachmentType, fileName, buffer);
773
774     if (contentTypeIsSuitableForInlineImageRepresentation(attachmentType)) {
775         auto image = HTMLImageElement::create(document);
776         image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(Blob::create(document->sessionID(), buffer, WTFMove(attachmentType)), WTFMove(fileName))));
777         image->setAttachmentElement(WTFMove(attachment));
778         if (preferredSize.width)
779             image->setAttributeWithoutSynchronization(HTMLNames::widthAttr, AtomString::number(*preferredSize.width));
780         if (preferredSize.height)
781             image->setAttributeWithoutSynchronization(HTMLNames::heightAttr, AtomString::number(*preferredSize.height));
782         return image;
783     }
784
785     attachment->updateAttributes({ buffer.size() }, WTFMove(attachmentType), WTFMove(fileName));
786     return attachment;
787 }
788
789 #endif // ENABLE(ATTACHMENT_ELEMENT)
790
791 bool WebContentReader::readFilePath(const String& path, PresentationSize preferredPresentationSize, const String& contentType)
792 {
793     if (path.isEmpty() || !frame.document())
794         return false;
795
796     auto& document = *frame.document();
797     if (!fragment)
798         fragment = document.createDocumentFragment();
799
800 #if ENABLE(ATTACHMENT_ELEMENT)
801     if (RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
802         fragment->appendChild(attachmentForFilePath(frame, path, preferredPresentationSize, contentType));
803 #endif
804
805     return true;
806 }
807
808 bool WebContentReader::readFilePaths(const Vector<String>& paths)
809 {
810     if (paths.isEmpty() || !frame.document())
811         return false;
812
813     for (auto& path : paths)
814         readFilePath(path);
815
816     return true;
817 }
818
819 bool WebContentReader::readURL(const URL& url, const String& title)
820 {
821     if (url.isEmpty())
822         return false;
823
824 #if PLATFORM(IOS_FAMILY)
825     // FIXME: This code shouldn't be accessing selection and changing the behavior.
826     if (!frame.editor().client()->hasRichlyEditableSelection()) {
827         if (readPlainText([(NSURL *)url absoluteString]))
828             return true;
829     }
830
831     if ([(NSURL *)url isFileURL])
832         return false;
833 #endif // PLATFORM(IOS_FAMILY)
834
835     auto document = makeRef(*frame.document());
836     auto anchor = HTMLAnchorElement::create(document.get());
837     anchor->setAttributeWithoutSynchronization(HTMLNames::hrefAttr, url.string());
838
839     NSString *linkText = title.isEmpty() ? [(NSURL *)url absoluteString] : (NSString *)title;
840     anchor->appendChild(document->createTextNode([linkText precomposedStringWithCanonicalMapping]));
841
842     auto newFragment = document->createDocumentFragment();
843     if (fragment)
844         newFragment->appendChild(HTMLBRElement::create(document.get()));
845     newFragment->appendChild(anchor);
846     addFragment(WTFMove(newFragment));
847     return true;
848 }
849
850 bool WebContentReader::readDataBuffer(SharedBuffer& buffer, const String& type, const String& name, PresentationSize preferredPresentationSize)
851 {
852     if (buffer.isEmpty())
853         return false;
854
855     if (!shouldReplaceRichContentWithAttachments())
856         return false;
857
858     auto document = makeRefPtr(frame.document());
859     if (!document)
860         return false;
861
862     if (!fragment)
863         fragment = document->createDocumentFragment();
864
865 #if ENABLE(ATTACHMENT_ELEMENT)
866     fragment->appendChild(attachmentForData(frame, buffer, type, name, preferredPresentationSize));
867 #else
868     UNUSED_PARAM(type);
869     UNUSED_PARAM(name);
870 #endif
871     return true;
872 }
873
874 }