8c331ff732e83049ca38874841de3b2a4ddf5ed0
[WebKit-https.git] / Source / WebCore / editing / cocoa / WebContentReaderCocoa.mm
1 /*
2  * Copyright (C) 2006-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #import "config.h"
27 #import "WebContentReader.h"
28
29 #import "ArchiveResource.h"
30 #import "Blob.h"
31 #import "BlobURL.h"
32 #import "CachedResourceLoader.h"
33 #import "DOMURL.h"
34 #import "Document.h"
35 #import "DocumentFragment.h"
36 #import "DocumentLoader.h"
37 #import "Editor.h"
38 #import "EditorClient.h"
39 #import "File.h"
40 #import "Frame.h"
41 #import "FrameLoader.h"
42 #import "FrameLoaderClient.h"
43 #import "HTMLAnchorElement.h"
44 #import "HTMLAttachmentElement.h"
45 #import "HTMLBRElement.h"
46 #import "HTMLBodyElement.h"
47 #import "HTMLDivElement.h"
48 #import "HTMLIFrameElement.h"
49 #import "HTMLImageElement.h"
50 #import "HTMLObjectElement.h"
51 #import "LegacyWebArchive.h"
52 #import "MIMETypeRegistry.h"
53 #import "Page.h"
54 #import "PublicURLManager.h"
55 #import "RenderView.h"
56 #import "RuntimeEnabledFeatures.h"
57 #import "SerializedAttachmentData.h"
58 #import "Settings.h"
59 #import "SocketProvider.h"
60 #import "TypedElementDescendantIterator.h"
61 #import "UTIUtilities.h"
62 #import "WebArchiveResourceFromNSAttributedString.h"
63 #import "WebArchiveResourceWebResourceHandler.h"
64 #import "WebNSAttributedStringExtras.h"
65 #import "markup.h"
66 #import <pal/spi/cocoa/NSAttributedStringSPI.h>
67 #import <wtf/FileSystem.h>
68 #import <wtf/SoftLinking.h>
69 #import <wtf/URLParser.h>
70
71 #if PLATFORM(MAC)
72 #include "LocalDefaultSystemAppearance.h"
73 #endif
74
75 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
76 @interface NSAttributedString ()
77 - (NSString *)_htmlDocumentFragmentString:(NSRange)range documentAttributes:(NSDictionary *)dict subresources:(NSArray **)subresources;
78 @end
79 #elif PLATFORM(IOS_FAMILY)
80 SOFT_LINK_PRIVATE_FRAMEWORK(WebKitLegacy)
81 #elif PLATFORM(MAC)
82 SOFT_LINK_FRAMEWORK_IN_UMBRELLA(WebKit, WebKitLegacy)
83 #endif
84
85 #if (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED < 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101300)
86 SOFT_LINK(WebKitLegacy, _WebCreateFragment, void, (WebCore::Document& document, NSAttributedString *string, WebCore::FragmentAndResources& result), (document, string, result))
87 #endif
88
89 namespace WebCore {
90
91 #if PLATFORM(IOSMAC)
92
93 static FragmentAndResources createFragment(Frame&, NSAttributedString *)
94 {
95     return { };
96 }
97
98 #elif (PLATFORM(IOS_FAMILY) && __IPHONE_OS_VERSION_MIN_REQUIRED >= 110000) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101300)
99
100 static NSDictionary *attributesForAttributedStringConversion()
101 {
102     // This function needs to be kept in sync with identically named one in WebKitLegacy, which is used on older OS versions.
103     RetainPtr<NSMutableArray> excludedElements = adoptNS([[NSMutableArray alloc] initWithObjects:
104         // Omit style since we want style to be inline so the fragment can be easily inserted.
105         @"style",
106         // Omit xml so the result is not XHTML.
107         @"xml",
108         // Omit tags that will get stripped when converted to a fragment anyway.
109         @"doctype", @"html", @"head", @"body",
110         // Omit deprecated tags.
111         @"applet", @"basefont", @"center", @"dir", @"font", @"menu", @"s", @"strike", @"u",
112 #if !ENABLE(ATTACHMENT_ELEMENT)
113         // Omit object so no file attachments are part of the fragment.
114         @"object",
115 #endif
116         nil]);
117
118 #if ENABLE(ATTACHMENT_ELEMENT)
119     if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
120         [excludedElements addObject:@"object"];
121 #endif
122
123 #if PLATFORM(IOS_FAMILY)
124     static NSString * const NSExcludedElementsDocumentAttribute = @"ExcludedElements";
125 #endif
126
127     NSURL *baseURL = URL::fakeURLWithRelativePart(emptyString());
128
129     // The output base URL needs +1 refcount to work around the fact that NSHTMLReader over-releases it.
130     CFRetain((__bridge CFTypeRef)baseURL);
131
132     return @{
133         NSExcludedElementsDocumentAttribute: excludedElements.get(),
134         @"InterchangeNewline": @YES,
135         @"CoalesceTabSpans": @YES,
136         @"OutputBaseURL": baseURL,
137         @"WebResourceHandler": [[WebArchiveResourceWebResourceHandler new] autorelease],
138     };
139 }
140
141 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
142 {
143     FragmentAndResources result;
144     Document& document = *frame.document();
145
146 #if PLATFORM(MAC)
147     auto* view = frame.view();
148     LocalDefaultSystemAppearance localAppearance(view ? view->useDarkAppearance() : false);
149 #endif
150
151     NSArray *subresources = nil;
152     NSString *fragmentString = [string _htmlDocumentFragmentString:NSMakeRange(0, [string length]) documentAttributes:attributesForAttributedStringConversion() subresources:&subresources];
153     auto fragment = DocumentFragment::create(document);
154     fragment->parseHTML(fragmentString, document.body(), DisallowScriptingAndPluginContent);
155
156     result.fragment = WTFMove(fragment);
157     for (WebArchiveResourceFromNSAttributedString *resource in subresources)
158         result.resources.append(*resource->resource);
159
160     return result;
161 }
162
163 #else
164
165 static FragmentAndResources createFragment(Frame& frame, NSAttributedString *string)
166 {
167     FragmentAndResources result;
168     _WebCreateFragment(*frame.document(), string, result);
169     return result;
170 }
171
172 #endif
173
174 class DeferredLoadingScope {
175 public:
176     DeferredLoadingScope(Frame& frame)
177         : m_frame(frame)
178         , m_cachedResourceLoader(frame.document()->cachedResourceLoader())
179     {
180         if (!frame.page()->defersLoading()) {
181             frame.page()->setDefersLoading(true);
182             m_didEnabledDeferredLoading = true;
183         }
184
185         if (m_cachedResourceLoader->imagesEnabled()) {
186             m_cachedResourceLoader->setImagesEnabled(false);
187             m_didDisableImage = true;
188         }
189     }
190
191     ~DeferredLoadingScope()
192     {
193         if (m_didDisableImage)
194             m_cachedResourceLoader->setImagesEnabled(true);
195         if (m_didEnabledDeferredLoading)
196             m_frame->page()->setDefersLoading(false);
197     }
198
199 private:
200     Ref<Frame> m_frame;
201     Ref<CachedResourceLoader> m_cachedResourceLoader;
202     bool m_didEnabledDeferredLoading { false };
203     bool m_didDisableImage { false };
204 };
205
206
207 static bool shouldReplaceSubresourceURL(const URL& url)
208 {
209     return !(url.protocolIsInHTTPFamily() || url.protocolIsData());
210 }
211
212 static bool shouldReplaceRichContentWithAttachments()
213 {
214 #if ENABLE(ATTACHMENT_ELEMENT)
215     return RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled();
216 #else
217     return false;
218 #endif
219 }
220
221 #if ENABLE(ATTACHMENT_ELEMENT)
222
223 static String mimeTypeFromContentType(const String& contentType)
224 {
225     if (contentType == String(kUTTypeVCard)) {
226         // CoreServices erroneously reports that "public.vcard" maps to "text/directory", rather
227         // than either "text/vcard" or "text/x-vcard". Work around this by special casing the
228         // "public.vcard" UTI type. See <rdar://problem/49478229> for more detail.
229         return "text/vcard"_s;
230     }
231     return isDeclaredUTI(contentType) ? MIMETypeFromUTI(contentType) : contentType;
232 }
233
234 static bool contentTypeIsSuitableForInlineImageRepresentation(const String& contentType)
235 {
236     return MIMETypeRegistry::isSupportedImageMIMEType(mimeTypeFromContentType(contentType));
237 }
238
239 static bool supportsClientSideAttachmentData(const Frame& frame)
240 {
241     if (auto* client = frame.editor().client())
242         return client->supportsClientSideAttachmentData();
243
244     return false;
245 }
246
247 #endif
248
249 static Ref<DocumentFragment> createFragmentForImageAttachment(Frame& frame, Document& document, Ref<SharedBuffer>&& buffer, const String& contentType)
250 {
251 #if ENABLE(ATTACHMENT_ELEMENT)
252     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
253     // FIXME: This fallback image name needs to be a localized string.
254     String defaultImageAttachmentName { "image"_s };
255
256     auto fragment = document.createDocumentFragment();
257     if (supportsClientSideAttachmentData(frame)) {
258         frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, defaultImageAttachmentName, WTFMove(buffer));
259         if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
260             auto image = HTMLImageElement::create(document);
261             image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, Blob::create(buffer.get(), contentType)));
262             image->setAttachmentElement(WTFMove(attachment));
263             fragment->appendChild(WTFMove(image));
264         } else {
265             attachment->updateAttributes(buffer->size(), contentType, defaultImageAttachmentName);
266             fragment->appendChild(WTFMove(attachment));
267         }
268     } else {
269         attachment->setFile(File::create(Blob::create(buffer.get(), contentType), defaultImageAttachmentName), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
270         fragment->appendChild(WTFMove(attachment));
271     }
272     return fragment;
273 #else
274     UNUSED_PARAM(blob);
275     return document.createDocumentFragment();
276 #endif
277 }
278
279 static void replaceRichContentWithAttachments(Frame& frame, DocumentFragment& fragment, const Vector<Ref<ArchiveResource>>& subresources)
280 {
281 #if ENABLE(ATTACHMENT_ELEMENT)
282     struct AttachmentInsertionInfo {
283         String fileName;
284         String contentType;
285         Ref<SharedBuffer> data;
286         Ref<Element> originalElement;
287     };
288
289     ASSERT(RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled());
290     if (subresources.isEmpty())
291         return;
292
293     // FIXME: Handle resources in subframe archives.
294     HashMap<AtomicString, Ref<ArchiveResource>> urlToResourceMap;
295     for (auto& subresource : subresources) {
296         auto& url = subresource->url();
297         if (shouldReplaceSubresourceURL(url))
298             urlToResourceMap.set(url.string(), subresource.copyRef());
299     }
300
301     Vector<SerializedAttachmentData> serializedAttachmentData;
302     for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) {
303         auto resourceURL = HTMLAttachmentElement::archiveResourceURL(attachment.uniqueIdentifier());
304         auto resourceEntry = urlToResourceMap.find(resourceURL.string());
305         if (resourceEntry == urlToResourceMap.end())
306             continue;
307
308         auto& resource = resourceEntry->value;
309         serializedAttachmentData.append({ attachment.uniqueIdentifier(), resource->mimeType(), resource->data() });
310     }
311
312     if (!serializedAttachmentData.isEmpty())
313         frame.editor().registerAttachments(WTFMove(serializedAttachmentData));
314
315     Vector<Ref<Element>> elementsToRemove;
316     Vector<AttachmentInsertionInfo> attachmentInsertionInfo;
317     for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) {
318         auto resourceURLString = image.attributeWithoutSynchronization(HTMLNames::srcAttr);
319         if (resourceURLString.isEmpty())
320             continue;
321
322         auto resource = urlToResourceMap.find(resourceURLString);
323         if (resource == urlToResourceMap.end())
324             continue;
325
326         auto name = image.attributeWithoutSynchronization(HTMLNames::altAttr);
327         if (name.isEmpty())
328             name = URL({ }, resourceURLString).lastPathComponent();
329         if (name.isEmpty())
330             name = AtomicString("media");
331
332         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), image });
333     }
334
335     for (auto& object : descendantsOfType<HTMLObjectElement>(fragment)) {
336         auto resourceURLString = object.attributeWithoutSynchronization(HTMLNames::dataAttr);
337         if (resourceURLString.isEmpty()) {
338             elementsToRemove.append(object);
339             continue;
340         }
341
342         auto resource = urlToResourceMap.find(resourceURLString);
343         if (resource == urlToResourceMap.end())
344             continue;
345
346         auto name = URL({ }, resourceURLString).lastPathComponent();
347         if (name.isEmpty())
348             name = AtomicString("file");
349
350         attachmentInsertionInfo.append({ name, resource->value->mimeType(), resource->value->data(), object });
351     }
352
353     for (auto& info : attachmentInsertionInfo) {
354         auto originalElement = WTFMove(info.originalElement);
355         auto parent = makeRefPtr(originalElement->parentNode());
356         if (!parent)
357             continue;
358
359         auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, fragment.document());
360         if (supportsClientSideAttachmentData(frame)) {
361             if (is<HTMLImageElement>(originalElement.get()) && contentTypeIsSuitableForInlineImageRepresentation(info.contentType)) {
362                 auto& image = downcast<HTMLImageElement>(originalElement.get());
363                 image.setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(*frame.document(), Blob::create(info.data, info.contentType)));
364                 image.setAttachmentElement(attachment.copyRef());
365             } else {
366                 attachment->updateAttributes(info.data->size(), info.contentType, info.fileName);
367                 parent->replaceChild(attachment, WTFMove(originalElement));
368             }
369             frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), WTFMove(info.contentType), WTFMove(info.fileName), WTFMove(info.data));
370         } else {
371             attachment->setFile(File::create(Blob::create(WTFMove(info.data), WTFMove(info.contentType)), WTFMove(info.fileName)), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
372             parent->replaceChild(WTFMove(attachment), WTFMove(originalElement));
373         }
374     }
375
376     for (auto& elementToRemove : elementsToRemove)
377         elementToRemove->remove();
378 #else
379     UNUSED_PARAM(fragment);
380     UNUSED_PARAM(subresources);
381 #endif
382 }
383
384 RefPtr<DocumentFragment> createFragmentAndAddResources(Frame& frame, NSAttributedString *string)
385 {
386     if (!frame.page() || !frame.document())
387         return nullptr;
388
389     auto& document = *frame.document();
390     if (!document.isHTMLDocument() || !string)
391         return nullptr;
392
393     DeferredLoadingScope scope(frame);
394     auto fragmentAndResources = createFragment(frame, string);
395     if (!fragmentAndResources.fragment)
396         return nullptr;
397
398     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
399         if (DocumentLoader* loader = frame.loader().documentLoader()) {
400             for (auto& resource : fragmentAndResources.resources)
401                 loader->addArchiveResource(resource.copyRef());
402         }
403         return WTFMove(fragmentAndResources.fragment);
404     }
405
406     if (shouldReplaceRichContentWithAttachments()) {
407         replaceRichContentWithAttachments(frame, *fragmentAndResources.fragment, fragmentAndResources.resources);
408         return WTFMove(fragmentAndResources.fragment);
409     }
410
411     HashMap<AtomicString, AtomicString> blobURLMap;
412     for (const Ref<ArchiveResource>& subresource : fragmentAndResources.resources) {
413         auto blob = Blob::create(subresource->data(), subresource->mimeType());
414         String blobURL = DOMURL::createObjectURL(document, blob);
415         blobURLMap.set(subresource->url().string(), blobURL);
416     }
417
418     replaceSubresourceURLs(*fragmentAndResources.fragment, WTFMove(blobURLMap));
419     return WTFMove(fragmentAndResources.fragment);
420 }
421
422 struct MarkupAndArchive {
423     String markup;
424     Ref<ArchiveResource> mainResource;
425     Ref<Archive> archive;
426 };
427
428 static Optional<MarkupAndArchive> extractMarkupAndArchive(SharedBuffer& buffer, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
429 {
430     auto archive = LegacyWebArchive::create(URL(), buffer);
431     if (!archive)
432         return WTF::nullopt;
433
434     RefPtr<ArchiveResource> mainResource = archive->mainResource();
435     if (!mainResource)
436         return WTF::nullopt;
437
438     auto type = mainResource->mimeType();
439     if (!canShowMIMETypeAsHTML(type))
440         return WTF::nullopt;
441
442     return MarkupAndArchive { String::fromUTF8(mainResource->data().data(), mainResource->data().size()), mainResource.releaseNonNull(), archive.releaseNonNull() };
443 }
444
445 static String sanitizeMarkupWithArchive(Frame& frame, Document& destinationDocument, MarkupAndArchive& markupAndArchive, MSOListQuirks msoListQuirks, const std::function<bool(const String)>& canShowMIMETypeAsHTML)
446 {
447     auto page = createPageForSanitizingWebContent();
448     Document* stagingDocument = page->mainFrame().document();
449     ASSERT(stagingDocument);
450     auto fragment = createFragmentFromMarkup(*stagingDocument, markupAndArchive.markup, markupAndArchive.mainResource->url(), DisallowScriptingAndPluginContent);
451
452     if (shouldReplaceRichContentWithAttachments()) {
453         replaceRichContentWithAttachments(frame, fragment, markupAndArchive.archive->subresources());
454         return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
455     }
456
457     HashMap<AtomicString, AtomicString> blobURLMap;
458     for (const Ref<ArchiveResource>& subresource : markupAndArchive.archive->subresources()) {
459         auto& subresourceURL = subresource->url();
460         if (!shouldReplaceSubresourceURL(subresourceURL))
461             continue;
462         auto blob = Blob::create(subresource->data(), subresource->mimeType());
463         String blobURL = DOMURL::createObjectURL(destinationDocument, blob);
464         blobURLMap.set(subresourceURL.string(), blobURL);
465     }
466
467     auto contentOrigin = SecurityOrigin::create(markupAndArchive.mainResource->url());
468     for (const Ref<Archive>& subframeArchive : markupAndArchive.archive->subframeArchives()) {
469         RefPtr<ArchiveResource> subframeMainResource = subframeArchive->mainResource();
470         if (!subframeMainResource)
471             continue;
472
473         auto type = subframeMainResource->mimeType();
474         if (!canShowMIMETypeAsHTML(type))
475             continue;
476
477         auto subframeURL = subframeMainResource->url();
478         if (!shouldReplaceSubresourceURL(subframeURL))
479             continue;
480
481         MarkupAndArchive subframeContent = { String::fromUTF8(subframeMainResource->data().data(), subframeMainResource->data().size()),
482             subframeMainResource.releaseNonNull(), subframeArchive.copyRef() };
483         auto subframeMarkup = sanitizeMarkupWithArchive(frame, destinationDocument, subframeContent, MSOListQuirks::Disabled, canShowMIMETypeAsHTML);
484
485         CString utf8 = subframeMarkup.utf8();
486         Vector<uint8_t> blobBuffer;
487         blobBuffer.reserveCapacity(utf8.length());
488         blobBuffer.append(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
489         auto blob = Blob::create(WTFMove(blobBuffer), type);
490
491         String subframeBlobURL = DOMURL::createObjectURL(destinationDocument, blob);
492         blobURLMap.set(subframeURL.string(), subframeBlobURL);
493     }
494
495     replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap));
496
497     return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);
498 }
499
500 bool WebContentReader::readWebArchive(SharedBuffer& buffer)
501 {
502     if (frame.settings().preferMIMETypeForImages() || !frame.document())
503         return false;
504
505     DeferredLoadingScope scope(frame);
506     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
507         return frame.loader().client().canShowMIMETypeAsHTML(type);
508     });
509     if (!result)
510         return false;
511     
512     if (!RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled()) {
513         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
514         if (DocumentLoader* loader = frame.loader().documentLoader())
515             loader->addAllArchiveResources(result->archive.get());
516         return true;
517     }
518
519     if (!shouldSanitize()) {
520         fragment = createFragmentFromMarkup(*frame.document(), result->markup, result->mainResource->url(), DisallowScriptingAndPluginContent);
521         return true;
522     }
523
524     String sanitizedMarkup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
525         return frame.loader().client().canShowMIMETypeAsHTML(type);
526     });
527     fragment = createFragmentFromMarkup(*frame.document(), sanitizedMarkup, WTF::blankURL(), DisallowScriptingAndPluginContent);
528
529     if (!fragment)
530         return false;
531
532     return true;
533 }
534
535 bool WebContentMarkupReader::readWebArchive(SharedBuffer& buffer)
536 {
537     if (!frame.document())
538         return false;
539
540     auto result = extractMarkupAndArchive(buffer, [&] (const String& type) {
541         return frame.loader().client().canShowMIMETypeAsHTML(type);
542     });
543     if (!result)
544         return false;
545
546     if (!shouldSanitize()) {
547         markup = result->markup;
548         return true;
549     }
550
551     markup = sanitizeMarkupWithArchive(frame, *frame.document(), *result, msoListQuirksForMarkup(), [&] (const String& type) {
552         return frame.loader().client().canShowMIMETypeAsHTML(type);
553     });
554
555     return true;
556 }
557
558 static String stripMicrosoftPrefix(const String& string)
559 {
560 #if PLATFORM(MAC)
561     // This code was added to make HTML paste from Microsoft Word on Mac work, back in 2004.
562     // It's a simple-minded way to ignore the CF_HTML clipboard format, just skipping over the
563     // description part and parsing the entire context plus fragment.
564     if (string.startsWith("Version:")) {
565         size_t location = string.findIgnoringASCIICase("<html");
566         if (location != notFound)
567             return string.substring(location);
568     }
569 #endif
570     return string;
571 }
572
573 bool WebContentReader::readHTML(const String& string)
574 {
575     if (frame.settings().preferMIMETypeForImages() || !frame.document())
576         return false;
577     Document& document = *frame.document();
578
579     String stringOmittingMicrosoftPrefix = stripMicrosoftPrefix(string);
580     if (stringOmittingMicrosoftPrefix.isEmpty())
581         return false;
582
583     String markup;
584     if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) {
585         markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
586             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
587                 return shouldReplaceSubresourceURL(url);
588             });
589         } });
590     } else
591         markup = stringOmittingMicrosoftPrefix;
592
593     addFragment(createFragmentFromMarkup(document, markup, emptyString(), DisallowScriptingAndPluginContent));
594     return true;
595 }
596
597 bool WebContentMarkupReader::readHTML(const String& string)
598 {
599     if (!frame.document())
600         return false;
601
602     String rawHTML = stripMicrosoftPrefix(string);
603     if (shouldSanitize()) {
604         markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {
605             removeSubresourceURLAttributes(fragment, [] (const URL& url) {
606                 return shouldReplaceSubresourceURL(url);
607             });
608         } });
609     } else
610         markup = rawHTML;
611
612     return !markup.isEmpty();
613 }
614
615 bool WebContentReader::readRTFD(SharedBuffer& buffer)
616 {
617     if (frame.settings().preferMIMETypeForImages() || !frame.document())
618         return false;
619
620     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
621     auto fragment = createFragmentAndAddResources(frame, string.get());
622     if (!fragment)
623         return false;
624     addFragment(fragment.releaseNonNull());
625
626     return true;
627 }
628
629 bool WebContentMarkupReader::readRTFD(SharedBuffer& buffer)
630 {
631     if (!frame.document())
632         return false;
633     auto string = adoptNS([[NSAttributedString alloc] initWithRTFD:buffer.createNSData().get() documentAttributes:nullptr]);
634     auto fragment = createFragmentAndAddResources(frame, string.get());
635     if (!fragment)
636         return false;
637
638     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
639     return true;
640 }
641
642 bool WebContentReader::readRTF(SharedBuffer& buffer)
643 {
644     if (frame.settings().preferMIMETypeForImages())
645         return false;
646
647     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
648     auto fragment = createFragmentAndAddResources(frame, string.get());
649     if (!fragment)
650         return false;
651     addFragment(fragment.releaseNonNull());
652
653     return true;
654 }
655
656 bool WebContentMarkupReader::readRTF(SharedBuffer& buffer)
657 {
658     if (!frame.document())
659         return false;
660     auto string = adoptNS([[NSAttributedString alloc] initWithRTF:buffer.createNSData().get() documentAttributes:nullptr]);
661     auto fragment = createFragmentAndAddResources(frame, string.get());
662     if (!fragment)
663         return false;
664     markup = serializeFragment(*fragment, SerializedNodes::SubtreeIncludingNode);
665     return true;
666 }
667
668 bool WebContentReader::readPlainText(const String& text)
669 {
670     if (!allowPlainText)
671         return false;
672
673     addFragment(createFragmentFromText(context, [text precomposedStringWithCanonicalMapping]));
674
675     madeFragmentFromPlainText = true;
676     return true;
677 }
678
679 bool WebContentReader::readImage(Ref<SharedBuffer>&& buffer, const String& type)
680 {
681     ASSERT(frame.document());
682     auto& document = *frame.document();
683     if (shouldReplaceRichContentWithAttachments())
684         addFragment(createFragmentForImageAttachment(frame, document, WTFMove(buffer), type));
685     else
686         addFragment(createFragmentForImageAndURL(document, DOMURL::createObjectURL(document, Blob::create(buffer.get(), type))));
687
688     return fragment;
689 }
690
691 #if ENABLE(ATTACHMENT_ELEMENT)
692
693 static Ref<HTMLElement> attachmentForFilePath(Frame& frame, const String& path)
694 {
695     auto document = makeRef(*frame.document());
696     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
697     if (!supportsClientSideAttachmentData(frame)) {
698         attachment->setFile(File::create(path), HTMLAttachmentElement::UpdateDisplayAttributes::Yes);
699         return attachment;
700     }
701
702     String contentType;
703     Optional<uint64_t> fileSizeForDisplay;
704     if (FileSystem::fileIsDirectory(path, FileSystem::ShouldFollowSymbolicLinks::Yes))
705         contentType = kUTTypeDirectory;
706     else {
707         long long fileSize;
708         FileSystem::getFileSize(path, fileSize);
709         fileSizeForDisplay = fileSize;
710         contentType = File::contentTypeForFile(path);
711         if (contentType.isEmpty())
712             contentType = kUTTypeData;
713     }
714
715     frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), contentType, path);
716
717     if (contentTypeIsSuitableForInlineImageRepresentation(contentType)) {
718         auto image = HTMLImageElement::create(document);
719         image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(path)));
720         image->setAttachmentElement(WTFMove(attachment));
721         return image;
722     }
723
724     attachment->updateAttributes(WTFMove(fileSizeForDisplay), contentType, FileSystem::pathGetFileName(path));
725     return attachment;
726 }
727
728 static Ref<HTMLElement> attachmentForData(Frame& frame, SharedBuffer& buffer, const String& contentType, const String& name)
729 {
730     auto document = makeRef(*frame.document());
731     auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, document);
732     auto mimeType = mimeTypeFromContentType(contentType);
733     auto typeForAttachmentElement = mimeType.isEmpty() ? contentType : mimeType;
734
735     // FIXME: We should instead ask CoreServices for a preferred name corresponding to the given content type.
736     static const char* defaultAttachmentName = "file";
737
738     String fileName;
739     if (name.isEmpty())
740         fileName = defaultAttachmentName;
741     else
742         fileName = name;
743
744     if (!supportsClientSideAttachmentData(frame)) {
745         attachment->setFile(File::create(Blob::create(buffer, WTFMove(typeForAttachmentElement)), fileName));
746         return attachment;
747     }
748
749     frame.editor().registerAttachmentIdentifier(attachment->ensureUniqueIdentifier(), typeForAttachmentElement, fileName, buffer);
750
751     if (contentTypeIsSuitableForInlineImageRepresentation(typeForAttachmentElement)) {
752         auto image = HTMLImageElement::create(document);
753         image->setAttributeWithoutSynchronization(HTMLNames::srcAttr, DOMURL::createObjectURL(document, File::create(Blob::create(buffer, WTFMove(typeForAttachmentElement)), WTFMove(fileName))));
754         image->setAttachmentElement(WTFMove(attachment));
755         return image;
756     }
757
758     attachment->updateAttributes({ buffer.size() }, WTFMove(typeForAttachmentElement), WTFMove(fileName));
759     return attachment;
760 }
761
762 #endif // ENABLE(ATTACHMENT_ELEMENT)
763
764 bool WebContentReader::readFilePaths(const Vector<String>& paths)
765 {
766     if (paths.isEmpty() || !frame.document())
767         return false;
768
769     auto& document = *frame.document();
770     if (!fragment)
771         fragment = document.createDocumentFragment();
772
773 #if ENABLE(ATTACHMENT_ELEMENT)
774     if (RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) {
775         for (auto& path : paths)
776             fragment->appendChild(attachmentForFilePath(frame, path));
777     }
778 #endif
779
780     return true;
781 }
782
783 bool WebContentReader::readURL(const URL& url, const String& title)
784 {
785     if (url.isEmpty())
786         return false;
787
788 #if PLATFORM(IOS_FAMILY)
789     // FIXME: This code shouldn't be accessing selection and changing the behavior.
790     if (!frame.editor().client()->hasRichlyEditableSelection()) {
791         if (readPlainText([(NSURL *)url absoluteString]))
792             return true;
793     }
794
795     if ([(NSURL *)url isFileURL])
796         return false;
797 #endif // PLATFORM(IOS_FAMILY)
798
799     auto document = makeRef(*frame.document());
800     auto anchor = HTMLAnchorElement::create(document.get());
801     anchor->setAttributeWithoutSynchronization(HTMLNames::hrefAttr, url.string());
802
803     NSString *linkText = title.isEmpty() ? [(NSURL *)url absoluteString] : (NSString *)title;
804     anchor->appendChild(document->createTextNode([linkText precomposedStringWithCanonicalMapping]));
805
806     auto newFragment = document->createDocumentFragment();
807     if (fragment)
808         newFragment->appendChild(HTMLBRElement::create(document.get()));
809     newFragment->appendChild(anchor);
810     addFragment(WTFMove(newFragment));
811     return true;
812 }
813
814 bool WebContentReader::readDataBuffer(SharedBuffer& buffer, const String& type, const String& name)
815 {
816     if (buffer.isEmpty())
817         return false;
818
819     if (!shouldReplaceRichContentWithAttachments())
820         return false;
821
822     auto document = makeRefPtr(frame.document());
823     if (!document)
824         return false;
825
826     if (!fragment)
827         fragment = document->createDocumentFragment();
828
829 #if ENABLE(ATTACHMENT_ELEMENT)
830     fragment->appendChild(attachmentForData(frame, buffer, type, name));
831 #else
832     UNUSED_PARAM(type);
833     UNUSED_PARAM(name);
834 #endif
835     return true;
836 }
837
838 }