7134cffa413474bfc1cd78d0ebca93aba1542a58
[WebKit-https.git] / Source / WebKit2 / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "WebProcessMessages.h"
30 #include "WebProcessPool.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsManager.h"
33 #include "WebResourceLoadStatisticsStoreMessages.h"
34 #include "WebsiteDataFetchOption.h"
35 #include "WebsiteDataType.h"
36 #include <WebCore/KeyedCoding.h>
37 #include <WebCore/ResourceLoadObserver.h>
38 #include <WebCore/ResourceLoadStatistics.h>
39 #include <wtf/CurrentTime.h>
40 #include <wtf/MainThread.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/RunLoop.h>
43 #include <wtf/threads/BinarySemaphore.h>
44
45 using namespace WebCore;
46
47 namespace WebKit {
48
49 static auto minimumTimeBetweeenDataRecordsRemoval = 60;
50 static OptionSet<WebKit::WebsiteDataType> dataTypesToRemove;
51 static auto notifyPages = false;
52 static auto shouldClassifyResourcesBeforeDataRecordsRemoval = true;
53
54 Ref<WebResourceLoadStatisticsStore> WebResourceLoadStatisticsStore::create(const String& resourceLoadStatisticsDirectory)
55 {
56     return adoptRef(*new WebResourceLoadStatisticsStore(resourceLoadStatisticsDirectory));
57 }
58
59 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory)
60     : m_resourceLoadStatisticsStore(ResourceLoadStatisticsStore::create())
61     , m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue"))
62     , m_statisticsStoragePath(resourceLoadStatisticsDirectory)
63 {
64 }
65
66 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
67 {
68 }
69
70 void WebResourceLoadStatisticsStore::setNotifyPagesWhenDataRecordsWereScanned(bool always)
71 {
72     notifyPages = always;
73 }
74
75 void WebResourceLoadStatisticsStore::setShouldClassifyResourcesBeforeDataRecordsRemoval(bool value)
76 {
77     shouldClassifyResourcesBeforeDataRecordsRemoval = value;
78 }
79
80 void WebResourceLoadStatisticsStore::setMinimumTimeBetweeenDataRecordsRemoval(double seconds)
81 {
82     if (seconds >= 0)
83         minimumTimeBetweeenDataRecordsRemoval = seconds;
84 }
85
86 void WebResourceLoadStatisticsStore::classifyResource(ResourceLoadStatistics& resourceStatistic)
87 {
88     if (!resourceStatistic.isPrevalentResource
89         && m_resourceLoadStatisticsClassifier.hasPrevalentResourceCharacteristics(resourceStatistic))
90         resourceStatistic.isPrevalentResource = true;
91 }
92
93 void WebResourceLoadStatisticsStore::removeDataRecords()
94 {
95     if (m_dataRecordsRemovalPending)
96         return;
97
98     double now = currentTime();
99     if (m_lastTimeDataRecordsWereRemoved
100         && now < m_lastTimeDataRecordsWereRemoved + minimumTimeBetweeenDataRecordsRemoval)
101         return;
102
103     Vector<String> prevalentResourceDomains = coreStore().prevalentResourceDomainsWithoutUserInteraction();
104     if (!prevalentResourceDomains.size())
105         return;
106     
107     m_dataRecordsRemovalPending = true;
108     m_lastTimeDataRecordsWereRemoved = now;
109
110     if (dataTypesToRemove.isEmpty()) {
111         dataTypesToRemove |= WebsiteDataType::Cookies;
112         dataTypesToRemove |= WebsiteDataType::OfflineWebApplicationCache;
113         dataTypesToRemove |= WebsiteDataType::SessionStorage;
114         dataTypesToRemove |= WebsiteDataType::LocalStorage;
115         dataTypesToRemove |= WebsiteDataType::WebSQLDatabases;
116         dataTypesToRemove |= WebsiteDataType::IndexedDBDatabases;
117         dataTypesToRemove |= WebsiteDataType::MediaKeys;
118         dataTypesToRemove |= WebsiteDataType::HSTSCache;
119         dataTypesToRemove |= WebsiteDataType::SearchFieldRecentSearches;
120 #if ENABLE(NETSCAPE_PLUGIN_API)
121         dataTypesToRemove |= WebsiteDataType::PlugInData;
122 #endif
123 #if ENABLE(MEDIA_STREAM)
124         dataTypesToRemove |= WebsiteDataType::MediaDeviceIdentifier;
125 #endif
126     }
127
128     // Switch to the main thread to get the default website data store
129     RunLoop::main().dispatch([prevalentResourceDomains = WTFMove(prevalentResourceDomains), this] () mutable {
130         WebProcessProxy::deleteWebsiteDataForTopPrivatelyControlledDomainsInAllPersistentDataStores(dataTypesToRemove, WTFMove(prevalentResourceDomains), notifyPages, [this](Vector<String> domainsWithDeletedWebsiteData) mutable {
131             this->coreStore().updateStatisticsForRemovedDataRecords(domainsWithDeletedWebsiteData);
132             m_dataRecordsRemovalPending = false;
133         });
134     });
135 }
136
137 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
138 {
139     if (shouldClassifyResourcesBeforeDataRecordsRemoval) {
140         coreStore().processStatistics([this] (ResourceLoadStatistics& resourceStatistic) {
141             classifyResource(resourceStatistic);
142         });
143     }
144     removeDataRecords();
145
146     writeStoreToDisk();
147 }
148
149 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(const Vector<WebCore::ResourceLoadStatistics>& origins)
150 {
151     coreStore().mergeStatistics(origins);
152     // Fire before processing statistics to propagate user
153     // interaction as fast as possible to the network process.
154     coreStore().fireShouldPartitionCookiesHandler();
155     processStatisticsAndDataRecords();
156 }
157
158 void WebResourceLoadStatisticsStore::setResourceLoadStatisticsEnabled(bool enabled)
159 {
160     if (enabled == m_resourceLoadStatisticsEnabled)
161         return;
162
163     m_resourceLoadStatisticsEnabled = enabled;
164
165     readDataFromDiskIfNeeded();
166 }
167
168 bool WebResourceLoadStatisticsStore::resourceLoadStatisticsEnabled() const
169 {
170     return m_resourceLoadStatisticsEnabled;
171 }
172
173 void WebResourceLoadStatisticsStore::registerSharedResourceLoadObserver()
174 {
175     ResourceLoadObserver::sharedObserver().setStatisticsStore(m_resourceLoadStatisticsStore.copyRef());
176     m_resourceLoadStatisticsStore->setNotificationCallback([this] {
177         if (m_resourceLoadStatisticsStore->isEmpty())
178             return;
179         processStatisticsAndDataRecords();
180     });
181 }
182     
183 void WebResourceLoadStatisticsStore::registerSharedResourceLoadObserver(std::function<void(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst)>&& shouldPartitionCookiesForDomainsHandler)
184 {
185     registerSharedResourceLoadObserver();
186 #if PLATFORM(COCOA)
187     WebResourceLoadStatisticsManager::registerUserDefaultsIfNeeded();
188 #endif
189     m_resourceLoadStatisticsStore->setShouldPartitionCookiesCallback([shouldPartitionCookiesForDomainsHandler = WTFMove(shouldPartitionCookiesForDomainsHandler)] (const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst) {
190         shouldPartitionCookiesForDomainsHandler(domainsToRemove, domainsToAdd, clearFirst);
191     });
192     m_resourceLoadStatisticsStore->setWritePersistentStoreCallback([this]() {
193         writeStoreToDisk();
194     });
195 }
196
197 void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
198 {
199     if (!m_resourceLoadStatisticsEnabled)
200         return;
201
202     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
203         coreStore().clearInMemory();
204
205         auto decoder = createDecoderFromDisk("full_browsing_session");
206         if (!decoder)
207             return;
208
209         coreStore().readDataFromDecoder(*decoder);
210     });
211 }
212
213 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
214 {
215     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
216 }
217
218 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
219 {
220     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
221 }
222
223 void WebResourceLoadStatisticsStore::applicationWillTerminate()
224 {
225     BinarySemaphore semaphore;
226     m_statisticsQueue->dispatch([&semaphore] {
227         // Make sure any ongoing work in our queue is finished before we terminate.
228         semaphore.signal();
229     });
230     semaphore.wait(WallTime::infinity());
231 }
232
233 String WebResourceLoadStatisticsStore::persistentStoragePath(const String& label) const
234 {
235     if (m_statisticsStoragePath.isEmpty())
236         return emptyString();
237
238     // TODO Decide what to call this file
239     return pathByAppendingComponent(m_statisticsStoragePath, label + "_resourceLog.plist");
240 }
241
242 void WebResourceLoadStatisticsStore::writeStoreToDisk()
243 {
244     auto encoder = coreStore().createEncoderFromData();
245     writeEncoderToDisk(*encoder.get(), "full_browsing_session");
246 }
247
248 void WebResourceLoadStatisticsStore::writeEncoderToDisk(KeyedEncoder& encoder, const String& label) const
249 {
250     RefPtr<SharedBuffer> rawData = encoder.finishEncoding();
251     if (!rawData)
252         return;
253
254     String resourceLog = persistentStoragePath(label);
255     if (resourceLog.isEmpty())
256         return;
257
258     if (!m_statisticsStoragePath.isEmpty()) {
259         makeAllDirectories(m_statisticsStoragePath);
260         platformExcludeFromBackup();
261     }
262
263     auto handle = openFile(resourceLog, OpenForWrite);
264     if (!handle)
265         return;
266     
267     int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
268     closeFile(handle);
269
270     if (writtenBytes != static_cast<int64_t>(rawData->size()))
271         WTFLogAlways("WebResourceLoadStatisticsStore: We only wrote %d out of %d bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
272 }
273
274 #if !PLATFORM(COCOA)
275 void WebResourceLoadStatisticsStore::platformExcludeFromBackup() const
276 {
277     // Do nothing
278 }
279 #endif
280
281 std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& label) const
282 {
283     String resourceLog = persistentStoragePath(label);
284     if (resourceLog.isEmpty())
285         return nullptr;
286
287     RefPtr<SharedBuffer> rawData = SharedBuffer::createWithContentsOfFile(resourceLog);
288     if (!rawData)
289         return nullptr;
290
291     return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(rawData->data()), rawData->size());
292 }
293
294 } // namespace WebKit