Rolling out r212757
[WebKit-https.git] / Source / WebKit2 / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "WebProcessMessages.h"
30 #include "WebProcessPool.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebsiteDataFetchOption.h"
34 #include "WebsiteDataType.h"
35 #include <WebCore/KeyedCoding.h>
36 #include <WebCore/ResourceLoadObserver.h>
37 #include <WebCore/ResourceLoadStatistics.h>
38 #include <wtf/CurrentTime.h>
39 #include <wtf/MainThread.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/RunLoop.h>
42 #include <wtf/threads/BinarySemaphore.h>
43
44 using namespace WebCore;
45
46 namespace WebKit {
47
48 static const auto featureVectorLengthThreshold = 3;
49 static auto minimumTimeBetweeenDataRecordsRemoval = 60;
50 static OptionSet<WebKit::WebsiteDataType> dataTypesToRemove;
51 static auto notifyPages = false;
52 static auto shouldClassifyResourcesBeforeDataRecordsRemoval = true;
53
54 Ref<WebResourceLoadStatisticsStore> WebResourceLoadStatisticsStore::create(const String& resourceLoadStatisticsDirectory)
55 {
56     return adoptRef(*new WebResourceLoadStatisticsStore(resourceLoadStatisticsDirectory));
57 }
58
59 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory)
60     : m_resourceLoadStatisticsStore(ResourceLoadStatisticsStore::create())
61     , m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue"))
62     , m_storagePath(resourceLoadStatisticsDirectory)
63 {
64 }
65
66 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
67 {
68 }
69
70 void WebResourceLoadStatisticsStore::setNotifyPagesWhenDataRecordsWereScanned(bool always)
71 {
72     notifyPages = always;
73 }
74
75 void WebResourceLoadStatisticsStore::setShouldClassifyResourcesBeforeDataRecordsRemoval(bool value)
76 {
77     shouldClassifyResourcesBeforeDataRecordsRemoval = value;
78 }
79
80 void WebResourceLoadStatisticsStore::setMinimumTimeBetweeenDataRecordsRemoval(double seconds)
81 {
82     if (seconds >= 0)
83         minimumTimeBetweeenDataRecordsRemoval = seconds;
84 }
85
86 bool WebResourceLoadStatisticsStore::hasPrevalentResourceCharacteristics(const ResourceLoadStatistics& resourceStatistic)
87 {
88     auto subresourceUnderTopFrameOriginsCount = resourceStatistic.subresourceUnderTopFrameOrigins.size();
89     auto subresourceUniqueRedirectsToCount = resourceStatistic.subresourceUniqueRedirectsTo.size();
90     auto subframeUnderTopFrameOriginsCount = resourceStatistic.subframeUnderTopFrameOrigins.size();
91     
92     if (!subresourceUnderTopFrameOriginsCount
93         && !subresourceUniqueRedirectsToCount
94         && !subframeUnderTopFrameOriginsCount)
95         return false;
96
97     if (subresourceUnderTopFrameOriginsCount > featureVectorLengthThreshold
98         || subresourceUniqueRedirectsToCount > featureVectorLengthThreshold
99         || subframeUnderTopFrameOriginsCount > featureVectorLengthThreshold)
100         return true;
101
102     // The resource is considered prevalent if the feature vector
103     // is longer than the threshold.
104     // Vector length for n dimensions is sqrt(a^2 + (...) + n^2).
105     double vectorLength = 0;
106     vectorLength += subresourceUnderTopFrameOriginsCount * subresourceUnderTopFrameOriginsCount;
107     vectorLength += subresourceUniqueRedirectsToCount * subresourceUniqueRedirectsToCount;
108     vectorLength += subframeUnderTopFrameOriginsCount * subframeUnderTopFrameOriginsCount;
109
110     ASSERT(vectorLength > 0);
111
112     return sqrt(vectorLength) > featureVectorLengthThreshold;
113 }
114     
115 void WebResourceLoadStatisticsStore::classifyResource(ResourceLoadStatistics& resourceStatistic)
116 {
117     if (!resourceStatistic.isPrevalentResource && hasPrevalentResourceCharacteristics(resourceStatistic)) {
118         resourceStatistic.isPrevalentResource = true;
119     }
120 }
121
122 void WebResourceLoadStatisticsStore::removeDataRecords()
123 {
124     if (m_dataRecordsRemovalPending)
125         return;
126
127     Vector<String> prevalentResourceDomains = coreStore().prevalentResourceDomainsWithoutUserInteraction();
128     if (!prevalentResourceDomains.size())
129         return;
130
131     double now = currentTime();
132     if (m_lastTimeDataRecordsWereRemoved
133         && now < m_lastTimeDataRecordsWereRemoved + minimumTimeBetweeenDataRecordsRemoval)
134         return;
135
136     m_dataRecordsRemovalPending = true;
137     m_lastTimeDataRecordsWereRemoved = now;
138
139     if (dataTypesToRemove.isEmpty()) {
140         dataTypesToRemove |= WebsiteDataType::Cookies;
141         dataTypesToRemove |= WebsiteDataType::DiskCache;
142         dataTypesToRemove |= WebsiteDataType::MemoryCache;
143         dataTypesToRemove |= WebsiteDataType::OfflineWebApplicationCache;
144         dataTypesToRemove |= WebsiteDataType::SessionStorage;
145         dataTypesToRemove |= WebsiteDataType::LocalStorage;
146         dataTypesToRemove |= WebsiteDataType::WebSQLDatabases;
147         dataTypesToRemove |= WebsiteDataType::IndexedDBDatabases;
148         dataTypesToRemove |= WebsiteDataType::MediaKeys;
149         dataTypesToRemove |= WebsiteDataType::HSTSCache;
150         dataTypesToRemove |= WebsiteDataType::SearchFieldRecentSearches;
151 #if ENABLE(NETSCAPE_PLUGIN_API)
152         dataTypesToRemove |= WebsiteDataType::PlugInData;
153 #endif
154 #if ENABLE(MEDIA_STREAM)
155         dataTypesToRemove |= WebsiteDataType::MediaDeviceIdentifier;
156 #endif
157     }
158
159     // Switch to the main thread to get the default website data store
160     RunLoop::main().dispatch([prevalentResourceDomains = WTFMove(prevalentResourceDomains), this] () mutable {
161         WebProcessProxy::deleteWebsiteDataForTopPrivatelyOwnedDomainsInAllPersistentDataStores(dataTypesToRemove, prevalentResourceDomains, notifyPages, [this](Vector<String> domainsWithDeletedWebsiteData) mutable {
162             this->coreStore().updateStatisticsForRemovedDataRecords(domainsWithDeletedWebsiteData);
163             m_dataRecordsRemovalPending = false;
164         });
165     });
166 }
167
168 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
169 {
170     if (shouldClassifyResourcesBeforeDataRecordsRemoval) {
171         coreStore().processStatistics([this] (ResourceLoadStatistics& resourceStatistic) {
172             classifyResource(resourceStatistic);
173         });
174     }
175     removeDataRecords();
176     
177     auto encoder = coreStore().createEncoderFromData();
178     
179     writeEncoderToDisk(*encoder.get(), "full_browsing_session");
180 }
181
182 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(const Vector<WebCore::ResourceLoadStatistics>& origins)
183 {
184     coreStore().mergeStatistics(origins);
185     processStatisticsAndDataRecords();
186 }
187
188 void WebResourceLoadStatisticsStore::setResourceLoadStatisticsEnabled(bool enabled)
189 {
190     if (enabled == m_resourceLoadStatisticsEnabled)
191         return;
192
193     m_resourceLoadStatisticsEnabled = enabled;
194
195     readDataFromDiskIfNeeded();
196 }
197
198 bool WebResourceLoadStatisticsStore::resourceLoadStatisticsEnabled() const
199 {
200     return m_resourceLoadStatisticsEnabled;
201 }
202
203
204 void WebResourceLoadStatisticsStore::registerSharedResourceLoadObserver()
205 {
206     ResourceLoadObserver::sharedObserver().setStatisticsStore(m_resourceLoadStatisticsStore.copyRef());
207     m_resourceLoadStatisticsStore->setNotificationCallback([this] {
208         if (m_resourceLoadStatisticsStore->isEmpty())
209             return;
210         processStatisticsAndDataRecords();
211     });
212 }
213
214 void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
215 {
216     if (!m_resourceLoadStatisticsEnabled)
217         return;
218
219     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
220         coreStore().clear();
221
222         auto decoder = createDecoderFromDisk("full_browsing_session");
223         if (!decoder)
224             return;
225
226         coreStore().readDataFromDecoder(*decoder);
227     });
228 }
229
230 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
231 {
232     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
233 }
234
235 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
236 {
237     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
238 }
239
240 void WebResourceLoadStatisticsStore::applicationWillTerminate()
241 {
242     BinarySemaphore semaphore;
243     m_statisticsQueue->dispatch([this, &semaphore] {
244         // Make sure any ongoing work in our queue is finished before we terminate.
245         semaphore.signal();
246     });
247     semaphore.wait(WallTime::infinity());
248 }
249
250 String WebResourceLoadStatisticsStore::persistentStoragePath(const String& label) const
251 {
252     if (m_storagePath.isEmpty())
253         return emptyString();
254
255     // TODO Decide what to call this file
256     return pathByAppendingComponent(m_storagePath, label + "_resourceLog.plist");
257 }
258
259 void WebResourceLoadStatisticsStore::writeEncoderToDisk(KeyedEncoder& encoder, const String& label) const
260 {
261     RefPtr<SharedBuffer> rawData = encoder.finishEncoding();
262     if (!rawData)
263         return;
264
265     String resourceLog = persistentStoragePath(label);
266     if (resourceLog.isEmpty())
267         return;
268
269     if (!m_storagePath.isEmpty())
270         makeAllDirectories(m_storagePath);
271
272     auto handle = openFile(resourceLog, OpenForWrite);
273     if (!handle)
274         return;
275     
276     int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
277     closeFile(handle);
278
279     if (writtenBytes != static_cast<int64_t>(rawData->size()))
280         WTFLogAlways("WebResourceLoadStatisticsStore: We only wrote %d out of %d bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
281 }
282
283 std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& label) const
284 {
285     String resourceLog = persistentStoragePath(label);
286     if (resourceLog.isEmpty())
287         return nullptr;
288
289     RefPtr<SharedBuffer> rawData = SharedBuffer::createWithContentsOfFile(resourceLog);
290     if (!rawData)
291         return nullptr;
292
293     return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(rawData->data()), rawData->size());
294 }
295
296 } // namespace WebKit