Resource Load Statistics: Communicate to the network process which domains to partition
[WebKit.git] / Source / WebKit2 / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "WebProcessMessages.h"
30 #include "WebProcessPool.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebsiteDataFetchOption.h"
34 #include "WebsiteDataType.h"
35 #include <WebCore/KeyedCoding.h>
36 #include <WebCore/ResourceLoadObserver.h>
37 #include <WebCore/ResourceLoadStatistics.h>
38 #include <wtf/CurrentTime.h>
39 #include <wtf/MainThread.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/RunLoop.h>
42 #include <wtf/threads/BinarySemaphore.h>
43
44 using namespace WebCore;
45
46 namespace WebKit {
47
48 static auto minimumTimeBetweeenDataRecordsRemoval = 60;
49 static OptionSet<WebKit::WebsiteDataType> dataTypesToRemove;
50 static auto notifyPages = false;
51 static auto shouldClassifyResourcesBeforeDataRecordsRemoval = true;
52
53 Ref<WebResourceLoadStatisticsStore> WebResourceLoadStatisticsStore::create(const String& resourceLoadStatisticsDirectory)
54 {
55     return adoptRef(*new WebResourceLoadStatisticsStore(resourceLoadStatisticsDirectory));
56 }
57
58 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory)
59     : m_resourceLoadStatisticsStore(ResourceLoadStatisticsStore::create())
60     , m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue"))
61     , m_statisticsStoragePath(resourceLoadStatisticsDirectory)
62 {
63 }
64
65 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
66 {
67 }
68
69 void WebResourceLoadStatisticsStore::setNotifyPagesWhenDataRecordsWereScanned(bool always)
70 {
71     notifyPages = always;
72 }
73
74 void WebResourceLoadStatisticsStore::setShouldClassifyResourcesBeforeDataRecordsRemoval(bool value)
75 {
76     shouldClassifyResourcesBeforeDataRecordsRemoval = value;
77 }
78
79 void WebResourceLoadStatisticsStore::setMinimumTimeBetweeenDataRecordsRemoval(double seconds)
80 {
81     if (seconds >= 0)
82         minimumTimeBetweeenDataRecordsRemoval = seconds;
83 }
84
85 void WebResourceLoadStatisticsStore::classifyResource(ResourceLoadStatistics& resourceStatistic)
86 {
87     if (!resourceStatistic.isPrevalentResource
88         && m_resourceLoadStatisticsClassifier.hasPrevalentResourceCharacteristics(resourceStatistic)) {
89         resourceStatistic.isPrevalentResource = true;
90         if (!resourceStatistic.hadUserInteraction)
91             m_resourceLoadStatisticsStore->fireShouldPartitionCookiesHandler(resourceStatistic.highLevelDomain, true);
92     }
93 }
94
95 void WebResourceLoadStatisticsStore::removeDataRecords()
96 {
97     if (m_dataRecordsRemovalPending)
98         return;
99
100     Vector<String> prevalentResourceDomains = coreStore().prevalentResourceDomainsWithoutUserInteraction();
101     if (!prevalentResourceDomains.size())
102         return;
103
104     double now = currentTime();
105     if (m_lastTimeDataRecordsWereRemoved
106         && now < m_lastTimeDataRecordsWereRemoved + minimumTimeBetweeenDataRecordsRemoval)
107         return;
108
109     m_dataRecordsRemovalPending = true;
110     m_lastTimeDataRecordsWereRemoved = now;
111
112     if (dataTypesToRemove.isEmpty()) {
113         dataTypesToRemove |= WebsiteDataType::Cookies;
114         dataTypesToRemove |= WebsiteDataType::DiskCache;
115         dataTypesToRemove |= WebsiteDataType::MemoryCache;
116         dataTypesToRemove |= WebsiteDataType::OfflineWebApplicationCache;
117         dataTypesToRemove |= WebsiteDataType::SessionStorage;
118         dataTypesToRemove |= WebsiteDataType::LocalStorage;
119         dataTypesToRemove |= WebsiteDataType::WebSQLDatabases;
120         dataTypesToRemove |= WebsiteDataType::IndexedDBDatabases;
121         dataTypesToRemove |= WebsiteDataType::MediaKeys;
122         dataTypesToRemove |= WebsiteDataType::HSTSCache;
123         dataTypesToRemove |= WebsiteDataType::SearchFieldRecentSearches;
124 #if ENABLE(NETSCAPE_PLUGIN_API)
125         dataTypesToRemove |= WebsiteDataType::PlugInData;
126 #endif
127 #if ENABLE(MEDIA_STREAM)
128         dataTypesToRemove |= WebsiteDataType::MediaDeviceIdentifier;
129 #endif
130     }
131
132     // Switch to the main thread to get the default website data store
133     RunLoop::main().dispatch([prevalentResourceDomains = WTFMove(prevalentResourceDomains), this] () mutable {
134         WebProcessProxy::deleteWebsiteDataForTopPrivatelyOwnedDomainsInAllPersistentDataStores(dataTypesToRemove, prevalentResourceDomains, notifyPages, [this](Vector<String> domainsWithDeletedWebsiteData) mutable {
135             this->coreStore().updateStatisticsForRemovedDataRecords(domainsWithDeletedWebsiteData);
136             m_dataRecordsRemovalPending = false;
137         });
138     });
139 }
140
141 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
142 {
143     if (shouldClassifyResourcesBeforeDataRecordsRemoval) {
144         coreStore().processStatistics([this] (ResourceLoadStatistics& resourceStatistic) {
145             classifyResource(resourceStatistic);
146         });
147     }
148     removeDataRecords();
149     
150     auto encoder = coreStore().createEncoderFromData();
151     
152     writeEncoderToDisk(*encoder.get(), "full_browsing_session");
153 }
154
155 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(const Vector<WebCore::ResourceLoadStatistics>& origins)
156 {
157     coreStore().mergeStatistics(origins);
158     processStatisticsAndDataRecords();
159 }
160
161 void WebResourceLoadStatisticsStore::setResourceLoadStatisticsEnabled(bool enabled)
162 {
163     if (enabled == m_resourceLoadStatisticsEnabled)
164         return;
165
166     m_resourceLoadStatisticsEnabled = enabled;
167
168     readDataFromDiskIfNeeded();
169 }
170
171 bool WebResourceLoadStatisticsStore::resourceLoadStatisticsEnabled() const
172 {
173     return m_resourceLoadStatisticsEnabled;
174 }
175
176 void WebResourceLoadStatisticsStore::registerSharedResourceLoadObserver()
177 {
178     ResourceLoadObserver::sharedObserver().setStatisticsStore(m_resourceLoadStatisticsStore.copyRef());
179     m_resourceLoadStatisticsStore->setNotificationCallback([this] {
180         if (m_resourceLoadStatisticsStore->isEmpty())
181             return;
182         processStatisticsAndDataRecords();
183     });
184 }
185     
186 void WebResourceLoadStatisticsStore::registerSharedResourceLoadObserver(std::function<void(const Vector<String>& primaryDomain, bool value)>&& shouldPartitionCookiesForDomainsHandler)
187 {
188     registerSharedResourceLoadObserver();
189     m_resourceLoadStatisticsStore->setShouldPartitionCookiesCallback([this, shouldPartitionCookiesForDomainsHandler = WTFMove(shouldPartitionCookiesForDomainsHandler)] (const Vector<String>& primaryDomains, bool value) {
190         shouldPartitionCookiesForDomainsHandler(primaryDomains, value);
191     });
192 }
193
194 void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
195 {
196     if (!m_resourceLoadStatisticsEnabled)
197         return;
198
199     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
200         coreStore().clear();
201
202         auto decoder = createDecoderFromDisk("full_browsing_session");
203         if (!decoder)
204             return;
205
206         coreStore().readDataFromDecoder(*decoder);
207     });
208 }
209
210 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
211 {
212     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
213 }
214
215 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
216 {
217     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
218 }
219
220 void WebResourceLoadStatisticsStore::applicationWillTerminate()
221 {
222     BinarySemaphore semaphore;
223     m_statisticsQueue->dispatch([this, &semaphore] {
224         // Make sure any ongoing work in our queue is finished before we terminate.
225         semaphore.signal();
226     });
227     semaphore.wait(WallTime::infinity());
228 }
229
230 String WebResourceLoadStatisticsStore::persistentStoragePath(const String& label) const
231 {
232     if (m_statisticsStoragePath.isEmpty())
233         return emptyString();
234
235     // TODO Decide what to call this file
236     return pathByAppendingComponent(m_statisticsStoragePath, label + "_resourceLog.plist");
237 }
238
239 void WebResourceLoadStatisticsStore::writeEncoderToDisk(KeyedEncoder& encoder, const String& label) const
240 {
241     RefPtr<SharedBuffer> rawData = encoder.finishEncoding();
242     if (!rawData)
243         return;
244
245     String resourceLog = persistentStoragePath(label);
246     if (resourceLog.isEmpty())
247         return;
248
249     if (!m_statisticsStoragePath.isEmpty())
250         makeAllDirectories(m_statisticsStoragePath);
251
252     auto handle = openFile(resourceLog, OpenForWrite);
253     if (!handle)
254         return;
255     
256     int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
257     closeFile(handle);
258
259     if (writtenBytes != static_cast<int64_t>(rawData->size()))
260         WTFLogAlways("WebResourceLoadStatisticsStore: We only wrote %d out of %d bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
261 }
262
263 std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& label) const
264 {
265     String resourceLog = persistentStoragePath(label);
266     if (resourceLog.isEmpty())
267         return nullptr;
268
269     RefPtr<SharedBuffer> rawData = SharedBuffer::createWithContentsOfFile(resourceLog);
270     if (!rawData)
271         return nullptr;
272
273     return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(rawData->data()), rawData->size());
274 }
275
276 } // namespace WebKit