Resource Load Statistics: Grandfather domains for existing data records
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadStatisticsStore.h"
28
29 #include "KeyedCoding.h"
30 #include "Logging.h"
31 #include "NetworkStorageSession.h"
32 #include "PlatformStrategies.h"
33 #include "ResourceLoadStatistics.h"
34 #include "SharedBuffer.h"
35 #include "URL.h"
36 #include <wtf/CurrentTime.h>
37 #include <wtf/NeverDestroyed.h>
38
39 namespace WebCore {
40
41 static const auto statisticsModelVersion = 4;
42 static const auto secondsPerDay = 24 * 3600;
43 static auto timeToLiveUserInteraction = 30 * secondsPerDay;
44 static auto timeToLiveCookiePartitionFree = 1 * secondsPerDay;
45 static auto grandfatheringTime = 3 * secondsPerDay;
46 static auto minimumTimeBetweeenDataRecordsRemoval = 60;
47
48 Ref<ResourceLoadStatisticsStore> ResourceLoadStatisticsStore::create()
49 {
50     return adoptRef(*new ResourceLoadStatisticsStore());
51 }
52     
53 bool ResourceLoadStatisticsStore::isPrevalentResource(const String& primaryDomain) const
54 {
55     auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
56     if (mapEntry == m_resourceStatisticsMap.end())
57         return false;
58
59     return mapEntry->value.isPrevalentResource;
60 }
61     
62 ResourceLoadStatistics& ResourceLoadStatisticsStore::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
63 {
64     auto addResult = m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
65         return ResourceLoadStatistics(primaryDomain);
66     });
67
68     return addResult.iterator->value;
69 }
70
71 void ResourceLoadStatisticsStore::setResourceStatisticsForPrimaryDomain(const String& primaryDomain, ResourceLoadStatistics&& statistics)
72 {
73     m_resourceStatisticsMap.set(primaryDomain, WTFMove(statistics));
74 }
75
76 typedef HashMap<String, ResourceLoadStatistics>::KeyValuePairType StatisticsValue;
77
78 std::unique_ptr<KeyedEncoder> ResourceLoadStatisticsStore::createEncoderFromData()
79 {
80     auto encoder = KeyedEncoder::encoder();
81
82     encoder->encodeUInt32("version", statisticsModelVersion);
83     encoder->encodeDouble("endOfGrandfatheringTimestamp", m_endOfGrandfatheringTimestamp);
84     encoder->encodeObjects("browsingStatistics", m_resourceStatisticsMap.begin(), m_resourceStatisticsMap.end(), [](KeyedEncoder& encoderInner, const StatisticsValue& origin) {
85         origin.value.encode(encoderInner);
86     });
87
88     return encoder;
89 }
90
91 void ResourceLoadStatisticsStore::readDataFromDecoder(KeyedDecoder& decoder)
92 {
93     if (m_resourceStatisticsMap.size())
94         return;
95
96     unsigned version;
97     if (!decoder.decodeUInt32("version", version))
98         version = 1;
99
100     static const auto minimumVersionWithGrandfathering = 3;
101     if (version > minimumVersionWithGrandfathering) {
102         double endOfGrandfatheringTimestamp;
103         if (decoder.decodeDouble("endOfGrandfatheringTimestamp", endOfGrandfatheringTimestamp))
104             m_endOfGrandfatheringTimestamp = endOfGrandfatheringTimestamp;
105         else
106             m_endOfGrandfatheringTimestamp = 0;
107     }
108
109     Vector<ResourceLoadStatistics> loadedStatistics;
110     bool succeeded = decoder.decodeObjects("browsingStatistics", loadedStatistics, [version](KeyedDecoder& decoderInner, ResourceLoadStatistics& statistics) {
111         return statistics.decode(decoderInner, version);
112     });
113
114     if (!succeeded)
115         return;
116
117     Vector<String> prevalentResourceDomainsWithoutUserInteraction;
118     prevalentResourceDomainsWithoutUserInteraction.reserveInitialCapacity(loadedStatistics.size());
119     for (auto& statistics : loadedStatistics) {
120         if (statistics.isPrevalentResource && !statistics.hadUserInteraction) {
121             prevalentResourceDomainsWithoutUserInteraction.uncheckedAppend(statistics.highLevelDomain);
122             statistics.isMarkedForCookiePartitioning = true;
123         }
124         m_resourceStatisticsMap.set(statistics.highLevelDomain, statistics);
125     }
126
127     fireShouldPartitionCookiesHandler({ }, prevalentResourceDomainsWithoutUserInteraction, true);
128 }
129
130 void ResourceLoadStatisticsStore::clearInMemory()
131 {
132     m_resourceStatisticsMap.clear();
133     fireShouldPartitionCookiesHandler({ }, { }, true);
134 }
135
136 void ResourceLoadStatisticsStore::clearInMemoryAndPersistent()
137 {
138     clearInMemory();
139     if (m_writePersistentStoreHandler)
140         m_writePersistentStoreHandler();
141     if (m_grandfatherExistingWebsiteDataHandler)
142         m_grandfatherExistingWebsiteDataHandler();
143 }
144
145 String ResourceLoadStatisticsStore::statisticsForOrigin(const String& origin)
146 {
147     auto iter = m_resourceStatisticsMap.find(origin);
148     if (iter == m_resourceStatisticsMap.end())
149         return emptyString();
150     
151     return "Statistics for " + origin + ":\n" + iter->value.toString();
152 }
153
154 Vector<ResourceLoadStatistics> ResourceLoadStatisticsStore::takeStatistics()
155 {
156     Vector<ResourceLoadStatistics> statistics;
157     statistics.reserveInitialCapacity(m_resourceStatisticsMap.size());
158     for (auto& statistic : m_resourceStatisticsMap.values())
159         statistics.uncheckedAppend(WTFMove(statistic));
160
161     m_resourceStatisticsMap.clear();
162
163     return statistics;
164 }
165
166 void ResourceLoadStatisticsStore::mergeStatistics(const Vector<ResourceLoadStatistics>& statistics)
167 {
168     for (auto& statistic : statistics) {
169         auto result = m_resourceStatisticsMap.ensure(statistic.highLevelDomain, [&statistic] {
170             return ResourceLoadStatistics(statistic.highLevelDomain);
171         });
172         
173         result.iterator->value.merge(statistic);
174     }
175 }
176
177 void ResourceLoadStatisticsStore::setNotificationCallback(std::function<void()> handler)
178 {
179     m_dataAddedHandler = WTFMove(handler);
180 }
181
182 void ResourceLoadStatisticsStore::setShouldPartitionCookiesCallback(std::function<void(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst)>&& handler)
183 {
184     m_shouldPartitionCookiesForDomainsHandler = WTFMove(handler);
185 }
186     
187 void ResourceLoadStatisticsStore::setWritePersistentStoreCallback(std::function<void()>&& handler)
188 {
189     m_writePersistentStoreHandler = WTFMove(handler);
190 }
191
192 void ResourceLoadStatisticsStore::setGrandfatherExistingWebsiteDataCallback(std::function<void()>&& handler)
193 {
194     m_grandfatherExistingWebsiteDataHandler = WTFMove(handler);
195 }
196
197 void ResourceLoadStatisticsStore::fireDataModificationHandler()
198 {
199     if (m_dataAddedHandler)
200         m_dataAddedHandler();
201 }
202
203 static inline bool shouldPartitionCookies(const ResourceLoadStatistics& statistic)
204 {
205     return statistic.isPrevalentResource
206         && (!statistic.hadUserInteraction || currentTime() > statistic.mostRecentUserInteraction + timeToLiveCookiePartitionFree);
207 }
208
209 void ResourceLoadStatisticsStore::fireShouldPartitionCookiesHandler()
210 {
211     Vector<String> domainsToRemove;
212     Vector<String> domainsToAdd;
213     
214     for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
215         bool shouldPartition = shouldPartitionCookies(resourceStatistic);
216         if (resourceStatistic.isMarkedForCookiePartitioning && !shouldPartition) {
217             resourceStatistic.isMarkedForCookiePartitioning = false;
218             domainsToRemove.append(resourceStatistic.highLevelDomain);
219         } else if (!resourceStatistic.isMarkedForCookiePartitioning && shouldPartition) {
220             resourceStatistic.isMarkedForCookiePartitioning = true;
221             domainsToAdd.append(resourceStatistic.highLevelDomain);
222         }
223     }
224
225     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
226         return;
227
228     if (m_shouldPartitionCookiesForDomainsHandler)
229         m_shouldPartitionCookiesForDomainsHandler(domainsToRemove, domainsToAdd, false);
230 }
231
232 void ResourceLoadStatisticsStore::fireShouldPartitionCookiesHandler(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, bool clearFirst)
233 {
234     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
235         return;
236     
237     if (m_shouldPartitionCookiesForDomainsHandler)
238         m_shouldPartitionCookiesForDomainsHandler(domainsToRemove, domainsToAdd, clearFirst);
239
240     if (clearFirst) {
241         for (auto& resourceStatistic : m_resourceStatisticsMap.values())
242             resourceStatistic.isMarkedForCookiePartitioning = false;
243     } else {
244         for (auto& domain : domainsToRemove)
245             ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = false;
246     }
247
248     for (auto& domain : domainsToAdd)
249         ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = true;
250 }
251
252 void ResourceLoadStatisticsStore::setTimeToLiveUserInteraction(double seconds)
253 {
254     if (seconds >= 0)
255         timeToLiveUserInteraction = seconds;
256 }
257
258 void ResourceLoadStatisticsStore::setTimeToLiveCookiePartitionFree(double seconds)
259 {
260     if (seconds >= 0)
261         timeToLiveCookiePartitionFree = seconds;
262 }
263
264 void ResourceLoadStatisticsStore::setMinimumTimeBetweeenDataRecordsRemoval(double seconds)
265 {
266     if (seconds >= 0)
267         minimumTimeBetweeenDataRecordsRemoval = seconds;
268 }
269
270 void ResourceLoadStatisticsStore::setGrandfatheringTime(double seconds)
271 {
272     if (seconds >= 0)
273         grandfatheringTime = seconds;
274 }
275
276 void ResourceLoadStatisticsStore::processStatistics(std::function<void(ResourceLoadStatistics&)>&& processFunction)
277 {
278     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
279         processFunction(resourceStatistic);
280 }
281
282 bool ResourceLoadStatisticsStore::hasHadRecentUserInteraction(ResourceLoadStatistics& resourceStatistic)
283 {
284     if (!resourceStatistic.hadUserInteraction)
285         return false;
286
287     if (currentTime() > resourceStatistic.mostRecentUserInteraction + timeToLiveUserInteraction) {
288         // Drop privacy sensitive data because we no longer need it.
289         // Set timestamp to 0.0 so that statistics merge will know
290         // it has been reset as opposed to its default -1.
291         resourceStatistic.mostRecentUserInteraction = 0;
292         resourceStatistic.hadUserInteraction = false;
293
294         return false;
295     }
296
297     return true;
298 }
299
300 Vector<String> ResourceLoadStatisticsStore::topPrivatelyControlledDomainsToRemoveWebsiteDataFor()
301 {
302     bool shouldCheckForGrandfathering = m_endOfGrandfatheringTimestamp > currentTime();
303     bool shouldClearGrandfathering = !shouldCheckForGrandfathering && m_endOfGrandfatheringTimestamp;
304
305     if (shouldClearGrandfathering)
306         m_endOfGrandfatheringTimestamp = 0;
307
308     Vector<String> prevalentResources;
309     for (auto& statistic : m_resourceStatisticsMap.values()) {
310         if (statistic.isPrevalentResource
311             && !hasHadRecentUserInteraction(statistic)
312             && (!shouldCheckForGrandfathering || !statistic.grandfathered))
313             prevalentResources.append(statistic.highLevelDomain);
314
315         if (shouldClearGrandfathering && statistic.grandfathered)
316             statistic.grandfathered = false;
317     }
318
319     return prevalentResources;
320 }
321
322 void ResourceLoadStatisticsStore::updateStatisticsForRemovedDataRecords(const Vector<String>& prevalentResourceDomains)
323 {
324     for (auto& prevalentResourceDomain : prevalentResourceDomains) {
325         ResourceLoadStatistics& statistic = ensureResourceStatisticsForPrimaryDomain(prevalentResourceDomain);
326         ++statistic.dataRecordsRemoved;
327     }
328 }
329
330 void ResourceLoadStatisticsStore::handleFreshStartWithEmptyOrNoStore(HashSet<String>&& topPrivatelyControlledDomainsToGrandfather)
331 {
332     for (auto& topPrivatelyControlledDomain : topPrivatelyControlledDomainsToGrandfather) {
333         ResourceLoadStatistics& statistic = ensureResourceStatisticsForPrimaryDomain(topPrivatelyControlledDomain);
334         statistic.grandfathered = true;
335     }
336     m_endOfGrandfatheringTimestamp = std::floor(currentTime()) + grandfatheringTime;
337 }
338
339 bool ResourceLoadStatisticsStore::shouldRemoveDataRecords()
340 {
341     if (m_dataRecordsRemovalPending)
342         return false;
343
344     if (m_lastTimeDataRecordsWereRemoved && currentTime() < m_lastTimeDataRecordsWereRemoved + minimumTimeBetweeenDataRecordsRemoval)
345         return false;
346
347     return true;
348 }
349
350 void ResourceLoadStatisticsStore::dataRecordsBeingRemoved()
351 {
352     m_lastTimeDataRecordsWereRemoved = currentTime();
353     m_dataRecordsRemovalPending = true;
354 }
355
356 void ResourceLoadStatisticsStore::dataRecordsWereRemoved()
357 {
358     m_dataRecordsRemovalPending = false;
359 }
360
361 }