Resource Load Statistics: Change grandfathering default to one hour
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadStatisticsStore.h"
28
29 #include "KeyedCoding.h"
30 #include "Logging.h"
31 #include "NetworkStorageSession.h"
32 #include "PlatformStrategies.h"
33 #include "ResourceLoadStatistics.h"
34 #include "SharedBuffer.h"
35 #include "URL.h"
36 #include <wtf/CurrentTime.h>
37 #include <wtf/NeverDestroyed.h>
38
39 namespace WebCore {
40
41 static const auto statisticsModelVersion = 4;
42 static const auto secondsPerHour = 3600;
43 static const auto secondsPerDay = 24 * secondsPerHour;
44 static auto timeToLiveUserInteraction = 30 * secondsPerDay;
45 static auto timeToLiveCookiePartitionFree = 1 * secondsPerDay;
46 static auto grandfatheringTime = 1 * secondsPerHour;
47 static auto minimumTimeBetweeenDataRecordsRemoval = 60;
48
49 Ref<ResourceLoadStatisticsStore> ResourceLoadStatisticsStore::create()
50 {
51     return adoptRef(*new ResourceLoadStatisticsStore());
52 }
53     
54 bool ResourceLoadStatisticsStore::isPrevalentResource(const String& primaryDomain) const
55 {
56     auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
57     if (mapEntry == m_resourceStatisticsMap.end())
58         return false;
59
60     return mapEntry->value.isPrevalentResource;
61 }
62     
63 ResourceLoadStatistics& ResourceLoadStatisticsStore::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
64 {
65     auto addResult = m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
66         return ResourceLoadStatistics(primaryDomain);
67     });
68
69     return addResult.iterator->value;
70 }
71
72 void ResourceLoadStatisticsStore::setResourceStatisticsForPrimaryDomain(const String& primaryDomain, ResourceLoadStatistics&& statistics)
73 {
74     m_resourceStatisticsMap.set(primaryDomain, WTFMove(statistics));
75 }
76
77 typedef HashMap<String, ResourceLoadStatistics>::KeyValuePairType StatisticsValue;
78
79 std::unique_ptr<KeyedEncoder> ResourceLoadStatisticsStore::createEncoderFromData()
80 {
81     auto encoder = KeyedEncoder::encoder();
82
83     encoder->encodeUInt32("version", statisticsModelVersion);
84     encoder->encodeDouble("endOfGrandfatheringTimestamp", m_endOfGrandfatheringTimestamp);
85     encoder->encodeObjects("browsingStatistics", m_resourceStatisticsMap.begin(), m_resourceStatisticsMap.end(), [](KeyedEncoder& encoderInner, const StatisticsValue& origin) {
86         origin.value.encode(encoderInner);
87     });
88
89     return encoder;
90 }
91
92 void ResourceLoadStatisticsStore::readDataFromDecoder(KeyedDecoder& decoder)
93 {
94     if (m_resourceStatisticsMap.size())
95         return;
96
97     unsigned version;
98     if (!decoder.decodeUInt32("version", version))
99         version = 1;
100
101     static const auto minimumVersionWithGrandfathering = 3;
102     if (version > minimumVersionWithGrandfathering) {
103         double endOfGrandfatheringTimestamp;
104         if (decoder.decodeDouble("endOfGrandfatheringTimestamp", endOfGrandfatheringTimestamp))
105             m_endOfGrandfatheringTimestamp = endOfGrandfatheringTimestamp;
106         else
107             m_endOfGrandfatheringTimestamp = 0;
108     }
109
110     Vector<ResourceLoadStatistics> loadedStatistics;
111     bool succeeded = decoder.decodeObjects("browsingStatistics", loadedStatistics, [version](KeyedDecoder& decoderInner, ResourceLoadStatistics& statistics) {
112         return statistics.decode(decoderInner, version);
113     });
114
115     if (!succeeded)
116         return;
117
118     Vector<String> prevalentResourceDomainsWithoutUserInteraction;
119     prevalentResourceDomainsWithoutUserInteraction.reserveInitialCapacity(loadedStatistics.size());
120     for (auto& statistics : loadedStatistics) {
121         if (statistics.isPrevalentResource && !statistics.hadUserInteraction) {
122             prevalentResourceDomainsWithoutUserInteraction.uncheckedAppend(statistics.highLevelDomain);
123             statistics.isMarkedForCookiePartitioning = true;
124         }
125         m_resourceStatisticsMap.set(statistics.highLevelDomain, statistics);
126     }
127
128     fireShouldPartitionCookiesHandler({ }, prevalentResourceDomainsWithoutUserInteraction, true);
129 }
130
131 void ResourceLoadStatisticsStore::clearInMemory()
132 {
133     m_resourceStatisticsMap.clear();
134     fireShouldPartitionCookiesHandler({ }, { }, true);
135 }
136
137 void ResourceLoadStatisticsStore::clearInMemoryAndPersistent()
138 {
139     clearInMemory();
140     if (m_writePersistentStoreHandler)
141         m_writePersistentStoreHandler();
142     if (m_grandfatherExistingWebsiteDataHandler)
143         m_grandfatherExistingWebsiteDataHandler();
144 }
145
146 String ResourceLoadStatisticsStore::statisticsForOrigin(const String& origin)
147 {
148     auto iter = m_resourceStatisticsMap.find(origin);
149     if (iter == m_resourceStatisticsMap.end())
150         return emptyString();
151     
152     return "Statistics for " + origin + ":\n" + iter->value.toString();
153 }
154
155 Vector<ResourceLoadStatistics> ResourceLoadStatisticsStore::takeStatistics()
156 {
157     Vector<ResourceLoadStatistics> statistics;
158     statistics.reserveInitialCapacity(m_resourceStatisticsMap.size());
159     for (auto& statistic : m_resourceStatisticsMap.values())
160         statistics.uncheckedAppend(WTFMove(statistic));
161
162     m_resourceStatisticsMap.clear();
163
164     return statistics;
165 }
166
167 void ResourceLoadStatisticsStore::mergeStatistics(const Vector<ResourceLoadStatistics>& statistics)
168 {
169     for (auto& statistic : statistics) {
170         auto result = m_resourceStatisticsMap.ensure(statistic.highLevelDomain, [&statistic] {
171             return ResourceLoadStatistics(statistic.highLevelDomain);
172         });
173         
174         result.iterator->value.merge(statistic);
175     }
176 }
177
178 void ResourceLoadStatisticsStore::setNotificationCallback(std::function<void()> handler)
179 {
180     m_dataAddedHandler = WTFMove(handler);
181 }
182
183 void ResourceLoadStatisticsStore::setShouldPartitionCookiesCallback(std::function<void(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst)>&& handler)
184 {
185     m_shouldPartitionCookiesForDomainsHandler = WTFMove(handler);
186 }
187     
188 void ResourceLoadStatisticsStore::setWritePersistentStoreCallback(std::function<void()>&& handler)
189 {
190     m_writePersistentStoreHandler = WTFMove(handler);
191 }
192
193 void ResourceLoadStatisticsStore::setGrandfatherExistingWebsiteDataCallback(std::function<void()>&& handler)
194 {
195     m_grandfatherExistingWebsiteDataHandler = WTFMove(handler);
196 }
197
198 void ResourceLoadStatisticsStore::fireDataModificationHandler()
199 {
200     if (m_dataAddedHandler)
201         m_dataAddedHandler();
202 }
203
204 static inline bool shouldPartitionCookies(const ResourceLoadStatistics& statistic)
205 {
206     return statistic.isPrevalentResource
207         && (!statistic.hadUserInteraction || currentTime() > statistic.mostRecentUserInteraction + timeToLiveCookiePartitionFree);
208 }
209
210 void ResourceLoadStatisticsStore::fireShouldPartitionCookiesHandler()
211 {
212     Vector<String> domainsToRemove;
213     Vector<String> domainsToAdd;
214     
215     for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
216         bool shouldPartition = shouldPartitionCookies(resourceStatistic);
217         if (resourceStatistic.isMarkedForCookiePartitioning && !shouldPartition) {
218             resourceStatistic.isMarkedForCookiePartitioning = false;
219             domainsToRemove.append(resourceStatistic.highLevelDomain);
220         } else if (!resourceStatistic.isMarkedForCookiePartitioning && shouldPartition) {
221             resourceStatistic.isMarkedForCookiePartitioning = true;
222             domainsToAdd.append(resourceStatistic.highLevelDomain);
223         }
224     }
225
226     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
227         return;
228
229     if (m_shouldPartitionCookiesForDomainsHandler)
230         m_shouldPartitionCookiesForDomainsHandler(domainsToRemove, domainsToAdd, false);
231 }
232
233 void ResourceLoadStatisticsStore::fireShouldPartitionCookiesHandler(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst)
234 {
235     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
236         return;
237     
238     if (m_shouldPartitionCookiesForDomainsHandler)
239         m_shouldPartitionCookiesForDomainsHandler(domainsToRemove, domainsToAdd, clearFirst);
240
241     if (clearFirst) {
242         for (auto& resourceStatistic : m_resourceStatisticsMap.values())
243             resourceStatistic.isMarkedForCookiePartitioning = false;
244     } else {
245         for (auto& domain : domainsToRemove)
246             ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = false;
247     }
248
249     for (auto& domain : domainsToAdd)
250         ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = true;
251 }
252
253 void ResourceLoadStatisticsStore::setTimeToLiveUserInteraction(double seconds)
254 {
255     if (seconds >= 0)
256         timeToLiveUserInteraction = seconds;
257 }
258
259 void ResourceLoadStatisticsStore::setTimeToLiveCookiePartitionFree(double seconds)
260 {
261     if (seconds >= 0)
262         timeToLiveCookiePartitionFree = seconds;
263 }
264
265 void ResourceLoadStatisticsStore::setMinimumTimeBetweeenDataRecordsRemoval(double seconds)
266 {
267     if (seconds >= 0)
268         minimumTimeBetweeenDataRecordsRemoval = seconds;
269 }
270
271 void ResourceLoadStatisticsStore::setGrandfatheringTime(double seconds)
272 {
273     if (seconds >= 0)
274         grandfatheringTime = seconds;
275 }
276
277 void ResourceLoadStatisticsStore::processStatistics(std::function<void(ResourceLoadStatistics&)>&& processFunction)
278 {
279     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
280         processFunction(resourceStatistic);
281 }
282
283 bool ResourceLoadStatisticsStore::hasHadRecentUserInteraction(ResourceLoadStatistics& resourceStatistic)
284 {
285     if (!resourceStatistic.hadUserInteraction)
286         return false;
287
288     if (currentTime() > resourceStatistic.mostRecentUserInteraction + timeToLiveUserInteraction) {
289         // Drop privacy sensitive data because we no longer need it.
290         // Set timestamp to 0.0 so that statistics merge will know
291         // it has been reset as opposed to its default -1.
292         resourceStatistic.mostRecentUserInteraction = 0;
293         resourceStatistic.hadUserInteraction = false;
294
295         return false;
296     }
297
298     return true;
299 }
300
301 Vector<String> ResourceLoadStatisticsStore::topPrivatelyControlledDomainsToRemoveWebsiteDataFor()
302 {
303     bool shouldCheckForGrandfathering = m_endOfGrandfatheringTimestamp > currentTime();
304     bool shouldClearGrandfathering = !shouldCheckForGrandfathering && m_endOfGrandfatheringTimestamp;
305
306     if (shouldClearGrandfathering)
307         m_endOfGrandfatheringTimestamp = 0;
308
309     Vector<String> prevalentResources;
310     for (auto& statistic : m_resourceStatisticsMap.values()) {
311         if (statistic.isPrevalentResource
312             && !hasHadRecentUserInteraction(statistic)
313             && (!shouldCheckForGrandfathering || !statistic.grandfathered))
314             prevalentResources.append(statistic.highLevelDomain);
315
316         if (shouldClearGrandfathering && statistic.grandfathered)
317             statistic.grandfathered = false;
318     }
319
320     return prevalentResources;
321 }
322
323 void ResourceLoadStatisticsStore::updateStatisticsForRemovedDataRecords(const Vector<String>& prevalentResourceDomains)
324 {
325     for (auto& prevalentResourceDomain : prevalentResourceDomains) {
326         ResourceLoadStatistics& statistic = ensureResourceStatisticsForPrimaryDomain(prevalentResourceDomain);
327         ++statistic.dataRecordsRemoved;
328     }
329 }
330
331 void ResourceLoadStatisticsStore::handleFreshStartWithEmptyOrNoStore(HashSet<String>&& topPrivatelyControlledDomainsToGrandfather)
332 {
333     for (auto& topPrivatelyControlledDomain : topPrivatelyControlledDomainsToGrandfather) {
334         ResourceLoadStatistics& statistic = ensureResourceStatisticsForPrimaryDomain(topPrivatelyControlledDomain);
335         statistic.grandfathered = true;
336     }
337     m_endOfGrandfatheringTimestamp = std::floor(currentTime()) + grandfatheringTime;
338 }
339
340 bool ResourceLoadStatisticsStore::shouldRemoveDataRecords()
341 {
342     if (m_dataRecordsRemovalPending)
343         return false;
344
345     if (m_lastTimeDataRecordsWereRemoved && currentTime() < m_lastTimeDataRecordsWereRemoved + minimumTimeBetweeenDataRecordsRemoval)
346         return false;
347
348     return true;
349 }
350
351 void ResourceLoadStatisticsStore::dataRecordsBeingRemoved()
352 {
353     m_lastTimeDataRecordsWereRemoved = currentTime();
354     m_dataRecordsRemovalPending = true;
355 }
356
357 void ResourceLoadStatisticsStore::dataRecordsWereRemoved()
358 {
359     m_dataRecordsRemovalPending = false;
360 }
361
362 }