Clean up Storage code
[WebKit-https.git] / Source / WebKit2 / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "APIWebsiteDataStore.h"
30 #include "WebProcessMessages.h"
31 #include "WebProcessPool.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebsiteDataFetchOption.h"
34 #include "WebsiteDataType.h"
35 #include <WebCore/KeyedCoding.h>
36 #include <WebCore/ResourceLoadStatistics.h>
37 #include <wtf/CurrentTime.h>
38 #include <wtf/MainThread.h>
39 #include <wtf/MathExtras.h>
40 #include <wtf/RunLoop.h>
41 #include <wtf/threads/BinarySemaphore.h>
42
43 using namespace WebCore;
44
45 namespace WebKit {
46
47 static const auto numberOfSecondsBetweenClearingDataRecords = 600;
48 static const auto featureVectorLengthThreshold = 3;
49
50 Ref<WebResourceLoadStatisticsStore> WebResourceLoadStatisticsStore::create(const String& resourceLoadStatisticsDirectory)
51 {
52     return adoptRef(*new WebResourceLoadStatisticsStore(resourceLoadStatisticsDirectory));
53 }
54
55 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory)
56     : m_resourceStatisticsStore(ResourceLoadStatisticsStore::create())
57     , m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue"))
58     , m_storagePath(resourceLoadStatisticsDirectory)
59 {
60 }
61
62 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
63 {
64 }
65
66 bool WebResourceLoadStatisticsStore::hasPrevalentResourceCharacteristics(const ResourceLoadStatistics& resourceStatistic)
67 {
68     auto subresourceUnderTopFrameOriginsCount = resourceStatistic.subresourceUnderTopFrameOrigins.size();
69     auto subresourceUniqueRedirectsToCount = resourceStatistic.subresourceUniqueRedirectsTo.size();
70     auto subframeUnderTopFrameOriginsCount = resourceStatistic.subframeUnderTopFrameOrigins.size();
71     
72     if (!subresourceUnderTopFrameOriginsCount
73         && !subresourceUniqueRedirectsToCount
74         && !subframeUnderTopFrameOriginsCount)
75         return false;
76
77     if (subresourceUnderTopFrameOriginsCount > featureVectorLengthThreshold
78         || subresourceUniqueRedirectsToCount > featureVectorLengthThreshold
79         || subframeUnderTopFrameOriginsCount > featureVectorLengthThreshold)
80         return true;
81
82     // The resource is considered prevalent if the feature vector
83     // is longer than the threshold.
84     // Vector length for n dimensions is sqrt(a^2 + (...) + n^2).
85     double vectorLength = 0;
86     vectorLength += subresourceUnderTopFrameOriginsCount * subresourceUnderTopFrameOriginsCount;
87     vectorLength += subresourceUniqueRedirectsToCount * subresourceUniqueRedirectsToCount;
88     vectorLength += subframeUnderTopFrameOriginsCount * subframeUnderTopFrameOriginsCount;
89
90     ASSERT(vectorLength > 0);
91
92     return sqrt(vectorLength) > featureVectorLengthThreshold;
93 }
94     
95 void WebResourceLoadStatisticsStore::classifyResource(ResourceLoadStatistics& resourceStatistic)
96 {
97     if (!resourceStatistic.isPrevalentResource && hasPrevalentResourceCharacteristics(resourceStatistic)) {
98         resourceStatistic.isPrevalentResource = true;
99     }
100 }
101
102 void WebResourceLoadStatisticsStore::clearDataRecords()
103 {
104     if (m_dataStoreClearPending)
105         return;
106
107     Vector<String> prevalentResourceDomains = coreStore().prevalentResourceDomainsWithoutUserInteraction();
108     if (!prevalentResourceDomains.size())
109         return;
110
111     double now = currentTime();
112     if (!m_lastTimeDataRecordsWereCleared) {
113         m_lastTimeDataRecordsWereCleared = now;
114         return;
115     }
116
117     if (now < (m_lastTimeDataRecordsWereCleared + numberOfSecondsBetweenClearingDataRecords))
118         return;
119
120     m_dataStoreClearPending = true;
121     m_lastTimeDataRecordsWereCleared = now;
122
123     // Switch to the main thread to get the default website data store
124     RunLoop::main().dispatch([prevalentResourceDomains = WTFMove(prevalentResourceDomains), this] () mutable {
125         auto& websiteDataStore = API::WebsiteDataStore::defaultDataStore()->websiteDataStore();
126
127         websiteDataStore.fetchData(WebsiteDataType::Cookies, { }, [prevalentResourceDomains = WTFMove(prevalentResourceDomains), this](auto websiteDataRecords) {
128             Vector<WebsiteDataRecord> dataRecords;
129             for (auto& websiteDataRecord : websiteDataRecords) {
130                 for (auto& prevalentResourceDomain : prevalentResourceDomains) {
131                     if (websiteDataRecord.displayName.endsWithIgnoringASCIICase(prevalentResourceDomain)) {
132                         auto suffixStart = websiteDataRecord.displayName.length() - prevalentResourceDomain.length();
133                         if (!suffixStart || websiteDataRecord.displayName[suffixStart - 1] == '.')
134                             dataRecords.append(websiteDataRecord);
135                     }
136                 }
137             }
138
139             if (!dataRecords.size()) {
140                 m_dataStoreClearPending = false;
141                 return;
142             }
143
144             auto& websiteDataStore = API::WebsiteDataStore::defaultDataStore()->websiteDataStore();
145             websiteDataStore.removeData(WebsiteDataType::Cookies, { WTFMove(dataRecords) }, [this] {
146                 m_dataStoreClearPending = false;
147             });
148         });
149     });
150 }
151
152 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(const Vector<WebCore::ResourceLoadStatistics>& origins)
153 {
154     coreStore().mergeStatistics(origins);
155
156     if (coreStore().hasEnoughDataForStatisticsProcessing()) {
157         coreStore().processStatistics([this] (ResourceLoadStatistics& resourceStatistic) {
158             classifyResource(resourceStatistic);
159             clearDataRecords();
160         });
161     }
162
163     auto encoder = coreStore().createEncoderFromData();
164     
165     writeEncoderToDisk(*encoder.get(), "full_browsing_session");
166 }
167
168 void WebResourceLoadStatisticsStore::setResourceLoadStatisticsEnabled(bool enabled)
169 {
170     if (enabled == m_resourceLoadStatisticsEnabled)
171         return;
172
173     m_resourceLoadStatisticsEnabled = enabled;
174
175     readDataFromDiskIfNeeded();
176 }
177
178 bool WebResourceLoadStatisticsStore::resourceLoadStatisticsEnabled() const
179 {
180     return m_resourceLoadStatisticsEnabled;
181 }
182
183 void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
184 {
185     if (!m_resourceLoadStatisticsEnabled)
186         return;
187
188     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
189         coreStore().clear();
190
191         auto decoder = createDecoderFromDisk("full_browsing_session");
192         if (!decoder)
193             return;
194
195         coreStore().readDataFromDecoder(*decoder);
196     });
197 }
198
199 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
200 {
201     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
202 }
203
204 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
205 {
206     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
207 }
208
209 void WebResourceLoadStatisticsStore::applicationWillTerminate()
210 {
211     BinarySemaphore semaphore;
212     m_statisticsQueue->dispatch([this, &semaphore] {
213         // Make sure any ongoing work in our queue is finished before we terminate.
214         semaphore.signal();
215     });
216     semaphore.wait(WallTime::infinity());
217 }
218
219 String WebResourceLoadStatisticsStore::persistentStoragePath(const String& label) const
220 {
221     if (m_storagePath.isEmpty())
222         return emptyString();
223
224     // TODO Decide what to call this file
225     return pathByAppendingComponent(m_storagePath, label + "_resourceLog.plist");
226 }
227
228 void WebResourceLoadStatisticsStore::writeEncoderToDisk(KeyedEncoder& encoder, const String& label) const
229 {
230     RefPtr<SharedBuffer> rawData = encoder.finishEncoding();
231     if (!rawData)
232         return;
233
234     String resourceLog = persistentStoragePath(label);
235     if (resourceLog.isEmpty())
236         return;
237
238     if (!m_storagePath.isEmpty())
239         makeAllDirectories(m_storagePath);
240
241     auto handle = openFile(resourceLog, OpenForWrite);
242     if (!handle)
243         return;
244     
245     int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
246     closeFile(handle);
247
248     if (writtenBytes != static_cast<int64_t>(rawData->size()))
249         WTFLogAlways("WebResourceLoadStatisticsStore: We only wrote %d out of %d bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
250 }
251
252 std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& label) const
253 {
254     String resourceLog = persistentStoragePath(label);
255     if (resourceLog.isEmpty())
256         return nullptr;
257
258     RefPtr<SharedBuffer> rawData = SharedBuffer::createWithContentsOfFile(resourceLog);
259     if (!rawData)
260         return nullptr;
261
262     return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(rawData->data()), rawData->size());
263 }
264
265 } // namespace WebKit