df1ee6581383b970082428600251a61adf28b7f5
[WebKit-https.git] / Source / WebKit / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "Logging.h"
30 #include "WebProcessMessages.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebResourceLoadStatisticsTelemetry.h"
34 #include "WebsiteDataFetchOption.h"
35 #include "WebsiteDataStore.h"
36 #include "WebsiteDataType.h"
37 #include <WebCore/KeyedCoding.h>
38 #include <WebCore/ResourceLoadStatistics.h>
39 #include <wtf/CrossThreadCopier.h>
40 #include <wtf/DateMath.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/NeverDestroyed.h>
43
44 using namespace WebCore;
45
46 namespace WebKit {
47
48 constexpr unsigned operatingDatesWindow { 30 };
49 constexpr unsigned statisticsModelVersion { 9 };
50 constexpr unsigned maxImportance { 3 };
51
52 template<typename T> static inline String isolatedPrimaryDomain(const T& value)
53 {
54     return ResourceLoadStatistics::primaryDomain(value).isolatedCopy();
55 }
56
57 static const OptionSet<WebsiteDataType>& dataTypesToRemove()
58 {
59     static NeverDestroyed<OptionSet<WebsiteDataType>> dataTypes(std::initializer_list<WebsiteDataType>({
60         WebsiteDataType::Cookies,
61         WebsiteDataType::IndexedDBDatabases,
62         WebsiteDataType::LocalStorage,
63 #if ENABLE(MEDIA_STREAM)
64         WebsiteDataType::MediaDeviceIdentifier,
65 #endif
66         WebsiteDataType::MediaKeys,
67         WebsiteDataType::OfflineWebApplicationCache,
68 #if ENABLE(NETSCAPE_PLUGIN_API)
69         WebsiteDataType::PlugInData,
70 #endif
71         WebsiteDataType::SearchFieldRecentSearches,
72         WebsiteDataType::SessionStorage,
73         WebsiteDataType::WebSQLDatabases,
74     }));
75
76     ASSERT(RunLoop::isMain());
77
78     return dataTypes;
79 }
80
81 class OperatingDate {
82 public:
83     OperatingDate() = default;
84
85     static OperatingDate fromWallTime(WallTime time)
86     {
87         double ms = time.secondsSinceEpoch().milliseconds();
88         int year = msToYear(ms);
89         int yearDay = dayInYear(ms, year);
90         int month = monthFromDayInYear(yearDay, isLeapYear(year));
91         int monthDay = dayInMonthFromDayInYear(yearDay, isLeapYear(year));
92
93         return OperatingDate { year, month, monthDay };
94     }
95
96     static OperatingDate today()
97     {
98         return OperatingDate::fromWallTime(WallTime::now());
99     }
100
101     Seconds secondsSinceEpoch() const
102     {
103         return Seconds { dateToDaysFrom1970(m_year, m_month, m_monthDay) * secondsPerDay };
104     }
105
106     bool operator==(const OperatingDate& other) const
107     {
108         return m_monthDay == other.m_monthDay && m_month == other.m_month && m_year == other.m_year;
109     }
110
111     bool operator<(const OperatingDate& other) const
112     {
113         return secondsSinceEpoch() < other.secondsSinceEpoch();
114     }
115
116     bool operator<=(const OperatingDate& other) const
117     {
118         return secondsSinceEpoch() <= other.secondsSinceEpoch();
119     }
120
121 private:
122     OperatingDate(int year, int month, int monthDay)
123         : m_year(year)
124         , m_month(month)
125         , m_monthDay(monthDay)
126     { }
127
128     int m_year { 0 };
129     int m_month { 0 }; // [0, 11].
130     int m_monthDay { 0 }; // [1, 31].
131 };
132
133 static Vector<OperatingDate> mergeOperatingDates(const Vector<OperatingDate>& existingDates, Vector<OperatingDate>&& newDates)
134 {
135     if (existingDates.isEmpty())
136         return WTFMove(newDates);
137
138     Vector<OperatingDate> mergedDates(existingDates.size() + newDates.size());
139
140     // Merge the two sorted vectors of dates.
141     std::merge(existingDates.begin(), existingDates.end(), newDates.begin(), newDates.end(), mergedDates.begin());
142     // Remove duplicate dates.
143     removeRepeatedElements(mergedDates);
144
145     // Drop old dates until the Vector size reaches operatingDatesWindow.
146     while (mergedDates.size() > operatingDatesWindow)
147         mergedDates.remove(0);
148
149     return mergedDates;
150 }
151
152 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory, UpdateCookiePartitioningForDomainsHandler&& updateCookiePartitioningForDomainsHandler)
153     : m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue", WorkQueue::Type::Serial, WorkQueue::QOS::Utility))
154     , m_persistentStorage(*this, resourceLoadStatisticsDirectory)
155     , m_updateCookiePartitioningForDomainsHandler(WTFMove(updateCookiePartitioningForDomainsHandler))
156     , m_dailyTasksTimer(RunLoop::main(), this, &WebResourceLoadStatisticsStore::performDailyTasks)
157 {
158     ASSERT(RunLoop::isMain());
159
160 #if PLATFORM(COCOA)
161     registerUserDefaultsIfNeeded();
162 #endif
163
164     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
165         m_persistentStorage.initialize();
166         includeTodayAsOperatingDateIfNecessary();
167     });
168
169     m_statisticsQueue->dispatchAfter(5_s, [this, protectedThis = makeRef(*this)] {
170         if (m_parameters.shouldSubmitTelemetry)
171             WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
172     });
173
174     m_dailyTasksTimer.startRepeating(24_h);
175 }
176
177 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
178 {
179 }
180     
181 void WebResourceLoadStatisticsStore::removeDataRecords()
182 {
183     ASSERT(!RunLoop::isMain());
184     
185     if (!shouldRemoveDataRecords())
186         return;
187
188     auto prevalentResourceDomains = topPrivatelyControlledDomainsToRemoveWebsiteDataFor();
189     if (prevalentResourceDomains.isEmpty())
190         return;
191     
192     setDataRecordsBeingRemoved(true);
193
194     RunLoop::main().dispatch([prevalentResourceDomains = CrossThreadCopier<Vector<String>>::copy(prevalentResourceDomains), this, protectedThis = makeRef(*this)] () mutable {
195         WebProcessProxy::deleteWebsiteDataForTopPrivatelyControlledDomainsInAllPersistentDataStores(dataTypesToRemove(), WTFMove(prevalentResourceDomains), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)](const HashSet<String>& domainsWithDeletedWebsiteData) mutable {
196             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(domainsWithDeletedWebsiteData)] () mutable {
197                 for (auto& prevalentResourceDomain : topDomains) {
198                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(prevalentResourceDomain);
199                     ++statistic.dataRecordsRemoved;
200                 }
201                 setDataRecordsBeingRemoved(false);
202             });
203         });
204     });
205 }
206
207 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
208 {
209     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] () {
210         if (m_parameters.shouldClassifyResourcesBeforeDataRecordsRemoval) {
211             for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
212                 if (!resourceStatistic.isPrevalentResource && m_resourceLoadStatisticsClassifier.hasPrevalentResourceCharacteristics(resourceStatistic))
213                     resourceStatistic.isPrevalentResource = true;
214             }
215         }
216         removeDataRecords();
217         
218         pruneStatisticsIfNeeded();
219
220         if (m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned) {
221             RunLoop::main().dispatch([] {
222                 WebProcessProxy::notifyPageStatisticsAndDataRecordsProcessed();
223             });
224         }
225
226         m_persistentStorage.scheduleOrWriteMemoryStore();
227     });
228 }
229
230 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(Vector<WebCore::ResourceLoadStatistics>&& origins)
231 {
232     ASSERT(!RunLoop::isMain());
233
234     mergeStatistics(WTFMove(origins));
235     // Fire before processing statistics to propagate user interaction as fast as possible to the network process.
236     updateCookiePartitioning();
237     processStatisticsAndDataRecords();
238 }
239
240 void WebResourceLoadStatisticsStore::grandfatherExistingWebsiteData()
241 {
242     ASSERT(!RunLoop::isMain());
243
244     RunLoop::main().dispatch([this, protectedThis = makeRef(*this)] () mutable {
245         WebProcessProxy::topPrivatelyControlledDomainsWithWebsiteData(dataTypesToRemove(), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)] (HashSet<String>&& topPrivatelyControlledDomainsWithWebsiteData) mutable {
246             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(topPrivatelyControlledDomainsWithWebsiteData)] () mutable {
247                 for (auto& topPrivatelyControlledDomain : topDomains) {
248                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(topPrivatelyControlledDomain);
249                     statistic.grandfathered = true;
250                 }
251                 m_endOfGrandfatheringTimestamp = WallTime::now() + m_parameters.grandfatheringTime;
252             });
253         });
254     });
255 }
256     
257 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
258 {
259     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
260 }
261
262 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
263 {
264     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
265 }
266
267 void WebResourceLoadStatisticsStore::applicationWillTerminate()
268 {
269     m_persistentStorage.finishAllPendingWorkSynchronously();
270 }
271
272 void WebResourceLoadStatisticsStore::performDailyTasks()
273 {
274     ASSERT(RunLoop::isMain());
275
276     includeTodayAsOperatingDateIfNecessary();
277     if (m_parameters.shouldSubmitTelemetry)
278         submitTelemetry();
279 }
280
281 void WebResourceLoadStatisticsStore::submitTelemetry()
282 {
283     ASSERT(RunLoop::isMain());
284     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
285         WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
286     });
287 }
288
289 void WebResourceLoadStatisticsStore::logUserInteraction(const URL& url)
290 {
291     if (url.isBlankURL() || url.isEmpty())
292         return;
293
294     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
295         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
296         statistics.hadUserInteraction = true;
297         statistics.mostRecentUserInteractionTime = WallTime::now();
298
299         updateCookiePartitioningForDomains({ primaryDomain }, { }, ShouldClearFirst::No);
300     });
301 }
302
303 void WebResourceLoadStatisticsStore::clearUserInteraction(const URL& url)
304 {
305     if (url.isBlankURL() || url.isEmpty())
306         return;
307
308     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
309         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
310         statistics.hadUserInteraction = false;
311         statistics.mostRecentUserInteractionTime = { };
312     });
313 }
314
315 void WebResourceLoadStatisticsStore::hasHadUserInteraction(const URL& url, WTF::Function<void (bool)>&& completionHandler)
316 {
317     if (url.isBlankURL() || url.isEmpty()) {
318         completionHandler(false);
319         return;
320     }
321
322     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
323         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
324         bool hadUserInteraction = mapEntry == m_resourceStatisticsMap.end() ? false: hasHadUnexpiredRecentUserInteraction(mapEntry->value);
325         RunLoop::main().dispatch([hadUserInteraction, completionHandler = WTFMove(completionHandler)] {
326             completionHandler(hadUserInteraction);
327         });
328     });
329 }
330
331 void WebResourceLoadStatisticsStore::setLastSeen(const URL& url, Seconds seconds)
332 {
333     if (url.isBlankURL() || url.isEmpty())
334         return;
335     
336     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), seconds] {
337         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
338         statistics.lastSeen = WallTime::fromRawSeconds(seconds.seconds());
339     });
340 }
341     
342 void WebResourceLoadStatisticsStore::setPrevalentResource(const URL& url)
343 {
344     if (url.isBlankURL() || url.isEmpty())
345         return;
346
347     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
348         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
349         statistics.isPrevalentResource = true;
350     });
351 }
352
353 void WebResourceLoadStatisticsStore::isPrevalentResource(const URL& url, WTF::Function<void (bool)>&& completionHandler)
354 {
355     if (url.isBlankURL() || url.isEmpty()) {
356         completionHandler(false);
357         return;
358     }
359
360     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
361         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
362         bool isPrevalentResource = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.isPrevalentResource;
363         RunLoop::main().dispatch([isPrevalentResource, completionHandler = WTFMove(completionHandler)] {
364             completionHandler(isPrevalentResource);
365         });
366     });
367 }
368
369 void WebResourceLoadStatisticsStore::clearPrevalentResource(const URL& url)
370 {
371     if (url.isBlankURL() || url.isEmpty())
372         return;
373
374     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
375         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
376         statistics.isPrevalentResource = false;
377     });
378 }
379
380 void WebResourceLoadStatisticsStore::setGrandfathered(const URL& url, bool value)
381 {
382     if (url.isBlankURL() || url.isEmpty())
383         return;
384
385     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), value] {
386         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
387         statistics.grandfathered = value;
388     });
389 }
390
391 void WebResourceLoadStatisticsStore::isGrandfathered(const URL& url, WTF::Function<void (bool)>&& completionHandler)
392 {
393     if (url.isBlankURL() || url.isEmpty()) {
394         completionHandler(false);
395         return;
396     }
397
398     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), completionHandler = WTFMove(completionHandler), primaryDomain = isolatedPrimaryDomain(url)] () mutable {
399         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
400         bool isGrandFathered = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.grandfathered;
401         RunLoop::main().dispatch([isGrandFathered, completionHandler = WTFMove(completionHandler)] {
402             completionHandler(isGrandFathered);
403         });
404     });
405 }
406
407 void WebResourceLoadStatisticsStore::setSubframeUnderTopFrameOrigin(const URL& subframe, const URL& topFrame)
408 {
409     if (subframe.isBlankURL() || subframe.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
410         return;
411
412     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubFrameDomain = isolatedPrimaryDomain(subframe)] {
413         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubFrameDomain);
414         statistics.subframeUnderTopFrameOrigins.add(primaryTopFrameDomain);
415     });
416 }
417
418 void WebResourceLoadStatisticsStore::setSubresourceUnderTopFrameOrigin(const URL& subresource, const URL& topFrame)
419 {
420     if (subresource.isBlankURL() || subresource.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
421         return;
422
423     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
424         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
425         statistics.subresourceUnderTopFrameOrigins.add(primaryTopFrameDomain);
426     });
427 }
428
429 void WebResourceLoadStatisticsStore::setSubresourceUniqueRedirectTo(const URL& subresource, const URL& hostNameRedirectedTo)
430 {
431     if (subresource.isBlankURL() || subresource.isEmpty() || hostNameRedirectedTo.isBlankURL() || hostNameRedirectedTo.isEmpty())
432         return;
433
434     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryRedirectDomain = isolatedPrimaryDomain(hostNameRedirectedTo), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
435         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
436         statistics.subresourceUniqueRedirectsTo.add(primaryRedirectDomain);
437     });
438 }
439
440 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdate()
441 {
442     // Helper function used by testing system. Should only be called from the main thread.
443     ASSERT(RunLoop::isMain());
444
445     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
446         updateCookiePartitioning();
447     });
448 }
449
450 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdateForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
451 {
452     // Helper function used by testing system. Should only be called from the main thread.
453     ASSERT(RunLoop::isMain());
454     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd), shouldClearFirst] {
455         updateCookiePartitioningForDomains(domainsToRemove, domainsToAdd, shouldClearFirst);
456     });
457 }
458
459 #if HAVE(CFNETWORK_STORAGE_PARTITIONING)
460 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningStateReset()
461 {
462     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
463         resetCookiePartitioningState();
464     });
465 }
466 #endif
467
468 void WebResourceLoadStatisticsStore::scheduleClearInMemory()
469 {
470     ASSERT(RunLoop::isMain());
471     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
472         clearInMemory();
473     });
474 }
475
476 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent()
477 {
478     ASSERT(RunLoop::isMain());
479     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
480         clearInMemory();
481         m_persistentStorage.clear();
482         grandfatherExistingWebsiteData();
483     });
484 }
485
486 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent(std::chrono::system_clock::time_point modifiedSince)
487 {
488     // For now, be conservative and clear everything regardless of modifiedSince.
489     UNUSED_PARAM(modifiedSince);
490     scheduleClearInMemoryAndPersistent();
491 }
492
493 void WebResourceLoadStatisticsStore::setTimeToLiveUserInteraction(Seconds seconds)
494 {
495     ASSERT(seconds >= 0_s);
496     m_parameters.timeToLiveUserInteraction = seconds;
497 }
498
499 void WebResourceLoadStatisticsStore::setTimeToLiveCookiePartitionFree(Seconds seconds)
500 {
501     ASSERT(seconds >= 0_s);
502     m_parameters.timeToLiveCookiePartitionFree = seconds;
503 }
504
505 void WebResourceLoadStatisticsStore::setMinimumTimeBetweenDataRecordsRemoval(Seconds seconds)
506 {
507     ASSERT(seconds >= 0_s);
508     m_parameters.minimumTimeBetweenDataRecordsRemoval = seconds;
509 }
510
511 void WebResourceLoadStatisticsStore::setGrandfatheringTime(Seconds seconds)
512 {
513     ASSERT(seconds >= 0_s);
514     m_parameters.grandfatheringTime = seconds;
515 }
516
517 bool WebResourceLoadStatisticsStore::shouldRemoveDataRecords() const
518 {
519     ASSERT(!RunLoop::isMain());
520     if (m_dataRecordsBeingRemoved)
521         return false;
522
523     return !m_lastTimeDataRecordsWereRemoved || MonotonicTime::now() >= (m_lastTimeDataRecordsWereRemoved + m_parameters.minimumTimeBetweenDataRecordsRemoval);
524 }
525
526 void WebResourceLoadStatisticsStore::setDataRecordsBeingRemoved(bool value)
527 {
528     ASSERT(!RunLoop::isMain());
529     m_dataRecordsBeingRemoved = value;
530     if (m_dataRecordsBeingRemoved)
531         m_lastTimeDataRecordsWereRemoved = MonotonicTime::now();
532 }
533
534 ResourceLoadStatistics& WebResourceLoadStatisticsStore::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
535 {
536     ASSERT(!RunLoop::isMain());
537     return m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
538         return ResourceLoadStatistics(primaryDomain);
539     }).iterator->value;
540 }
541
542 std::unique_ptr<KeyedEncoder> WebResourceLoadStatisticsStore::createEncoderFromData() const
543 {
544     ASSERT(!RunLoop::isMain());
545     auto encoder = KeyedEncoder::encoder();
546     encoder->encodeUInt32("version", statisticsModelVersion);
547     encoder->encodeDouble("endOfGrandfatheringTimestamp", m_endOfGrandfatheringTimestamp.secondsSinceEpoch().value());
548
549     encoder->encodeObjects("browsingStatistics", m_resourceStatisticsMap.begin(), m_resourceStatisticsMap.end(), [](KeyedEncoder& encoderInner, const auto& origin) {
550         origin.value.encode(encoderInner);
551     });
552
553     encoder->encodeObjects("operatingDates", m_operatingDates.begin(), m_operatingDates.end(), [](KeyedEncoder& encoderInner, OperatingDate date) {
554         encoderInner.encodeDouble("date", date.secondsSinceEpoch().value());
555     });
556
557     return encoder;
558 }
559
560 void WebResourceLoadStatisticsStore::mergeWithDataFromDecoder(KeyedDecoder& decoder)
561 {
562     ASSERT(!RunLoop::isMain());
563
564     unsigned versionOnDisk;
565     if (!decoder.decodeUInt32("version", versionOnDisk))
566         return;
567
568     if (versionOnDisk != statisticsModelVersion)
569         return;
570
571     double endOfGrandfatheringTimestamp;
572     if (decoder.decodeDouble("endOfGrandfatheringTimestamp", endOfGrandfatheringTimestamp))
573         m_endOfGrandfatheringTimestamp = WallTime::fromRawSeconds(endOfGrandfatheringTimestamp);
574     else
575         m_endOfGrandfatheringTimestamp = { };
576
577     Vector<ResourceLoadStatistics> loadedStatistics;
578     bool succeeded = decoder.decodeObjects("browsingStatistics", loadedStatistics, [](KeyedDecoder& decoderInner, ResourceLoadStatistics& statistics) {
579         return statistics.decode(decoderInner);
580     });
581
582     if (!succeeded)
583         return;
584
585     mergeStatistics(WTFMove(loadedStatistics));
586     updateCookiePartitioning();
587
588     Vector<OperatingDate> operatingDates;
589     succeeded = decoder.decodeObjects("operatingDates", operatingDates, [](KeyedDecoder& decoder, OperatingDate& date) {
590         double value;
591         if (!decoder.decodeDouble("date", value))
592             return false;
593
594         date = OperatingDate::fromWallTime(WallTime::fromRawSeconds(value));
595         return true;
596     });
597
598     if (!succeeded)
599         return;
600
601     m_operatingDates = mergeOperatingDates(m_operatingDates, WTFMove(operatingDates));
602 }
603
604 void WebResourceLoadStatisticsStore::clearInMemory()
605 {
606     ASSERT(!RunLoop::isMain());
607     m_resourceStatisticsMap.clear();
608     m_operatingDates.clear();
609
610     updateCookiePartitioningForDomains({ }, { }, ShouldClearFirst::Yes);
611 }
612
613 void WebResourceLoadStatisticsStore::mergeStatistics(Vector<ResourceLoadStatistics>&& statistics)
614 {
615     ASSERT(!RunLoop::isMain());
616     for (auto& statistic : statistics) {
617         auto result = m_resourceStatisticsMap.ensure(statistic.highLevelDomain, [&statistic] {
618             return WTFMove(statistic);
619         });
620         if (!result.isNewEntry)
621             result.iterator->value.merge(statistic);
622     }
623 }
624
625 inline bool WebResourceLoadStatisticsStore::shouldPartitionCookies(const ResourceLoadStatistics& statistic) const
626 {
627     return statistic.isPrevalentResource && (!statistic.hadUserInteraction || WallTime::now() > statistic.mostRecentUserInteractionTime + m_parameters.timeToLiveCookiePartitionFree);
628 }
629
630 void WebResourceLoadStatisticsStore::updateCookiePartitioning()
631 {
632     ASSERT(!RunLoop::isMain());
633
634     Vector<String> domainsToRemove;
635     Vector<String> domainsToAdd;
636     for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
637         bool shouldPartition = shouldPartitionCookies(resourceStatistic);
638         if (resourceStatistic.isMarkedForCookiePartitioning && !shouldPartition) {
639             resourceStatistic.isMarkedForCookiePartitioning = false;
640             domainsToRemove.append(resourceStatistic.highLevelDomain);
641         } else if (!resourceStatistic.isMarkedForCookiePartitioning && shouldPartition) {
642             resourceStatistic.isMarkedForCookiePartitioning = true;
643             domainsToAdd.append(resourceStatistic.highLevelDomain);
644         }
645     }
646
647     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
648         return;
649
650     RunLoop::main().dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
651         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, ShouldClearFirst::No);
652     });
653 }
654
655 void WebResourceLoadStatisticsStore::updateCookiePartitioningForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
656 {
657     ASSERT(!RunLoop::isMain());
658     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
659         return;
660
661     RunLoop::main().dispatch([this, shouldClearFirst, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
662         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, shouldClearFirst);
663     });
664
665     if (shouldClearFirst == ShouldClearFirst::Yes)
666         resetCookiePartitioningState();
667     else {
668         for (auto& domain : domainsToRemove)
669             ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = false;
670     }
671
672     for (auto& domain : domainsToAdd)
673         ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = true;
674 }
675
676 void WebResourceLoadStatisticsStore::resetCookiePartitioningState()
677 {
678     ASSERT(!RunLoop::isMain());
679     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
680         resourceStatistic.isMarkedForCookiePartitioning = false;
681 }
682
683 void WebResourceLoadStatisticsStore::processStatistics(const WTF::Function<void (const ResourceLoadStatistics&)>& processFunction) const
684 {
685     ASSERT(!RunLoop::isMain());
686     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
687         processFunction(resourceStatistic);
688 }
689
690 bool WebResourceLoadStatisticsStore::hasHadUnexpiredRecentUserInteraction(ResourceLoadStatistics& resourceStatistic) const
691 {
692     if (resourceStatistic.hadUserInteraction && hasStatisticsExpired(resourceStatistic)) {
693         // Drop privacy sensitive data because we no longer need it.
694         // Set timestamp to 0 so that statistics merge will know
695         // it has been reset as opposed to its default -1.
696         resourceStatistic.mostRecentUserInteractionTime = { };
697         resourceStatistic.hadUserInteraction = false;
698     }
699
700     return resourceStatistic.hadUserInteraction;
701 }
702
703 Vector<String> WebResourceLoadStatisticsStore::topPrivatelyControlledDomainsToRemoveWebsiteDataFor()
704 {
705     ASSERT(!RunLoop::isMain());
706
707     bool shouldCheckForGrandfathering = m_endOfGrandfatheringTimestamp > WallTime::now();
708     bool shouldClearGrandfathering = !shouldCheckForGrandfathering && m_endOfGrandfatheringTimestamp;
709
710     if (shouldClearGrandfathering)
711         m_endOfGrandfatheringTimestamp = { };
712
713     Vector<String> prevalentResources;
714     for (auto& statistic : m_resourceStatisticsMap.values()) {
715         if (statistic.isPrevalentResource && !hasHadUnexpiredRecentUserInteraction(statistic) && (!shouldCheckForGrandfathering || !statistic.grandfathered))
716             prevalentResources.append(statistic.highLevelDomain);
717
718         if (shouldClearGrandfathering && statistic.grandfathered)
719             statistic.grandfathered = false;
720     }
721
722     return prevalentResources;
723 }
724
725 void WebResourceLoadStatisticsStore::includeTodayAsOperatingDateIfNecessary()
726 {
727     auto today = OperatingDate::today();
728     if (!m_operatingDates.isEmpty() && today <= m_operatingDates.last())
729         return;
730
731     while (m_operatingDates.size() >= operatingDatesWindow)
732         m_operatingDates.remove(0);
733
734     m_operatingDates.append(today);
735 }
736
737 bool WebResourceLoadStatisticsStore::hasStatisticsExpired(const ResourceLoadStatistics& resourceStatistic) const
738 {
739     if (m_operatingDates.size() >= operatingDatesWindow) {
740         if (OperatingDate::fromWallTime(resourceStatistic.mostRecentUserInteractionTime) < m_operatingDates.first())
741             return true;
742     }
743
744     // If we don't meet the real criteria for an expired statistic, check the user setting for a tighter restriction (mainly for testing).
745     if (m_parameters.timeToLiveUserInteraction) {
746         if (WallTime::now() > resourceStatistic.mostRecentUserInteractionTime + m_parameters.timeToLiveUserInteraction.value())
747             return true;
748     }
749
750     return false;
751 }
752     
753 void WebResourceLoadStatisticsStore::setMaxStatisticsEntries(size_t maximumEntryCount)
754 {
755     m_parameters.maxStatisticsEntries = maximumEntryCount;
756 }
757     
758 void WebResourceLoadStatisticsStore::setPruneEntriesDownTo(size_t pruneTargetCount)
759 {
760     m_parameters.pruneEntriesDownTo = pruneTargetCount;
761 }
762     
763 struct StatisticsLastSeen {
764     String topPrivatelyOwnedDomain;
765     WallTime lastSeen;
766 };
767     
768 static void pruneResources(HashMap<String, WebCore::ResourceLoadStatistics>& statisticsMap, Vector<StatisticsLastSeen>& statisticsToPrune, size_t& numberOfEntriesToPrune)
769 {
770     if (statisticsToPrune.size() > numberOfEntriesToPrune) {
771         std::sort(statisticsToPrune.begin(), statisticsToPrune.end(), [](const StatisticsLastSeen& a, const StatisticsLastSeen& b) {
772             return a.lastSeen < b.lastSeen;
773         });
774     }
775
776     for (size_t i = 0, end = std::min(numberOfEntriesToPrune, statisticsToPrune.size()); i != end; ++i, --numberOfEntriesToPrune)
777         statisticsMap.remove(statisticsToPrune[i].topPrivatelyOwnedDomain);
778 }
779     
780 static unsigned computeImportance(const ResourceLoadStatistics& resourceStatistic)
781 {
782     unsigned importance = maxImportance;
783     if (!resourceStatistic.isPrevalentResource)
784         importance -= 1;
785     if (!resourceStatistic.hadUserInteraction)
786         importance -= 2;
787     return importance;
788 }
789     
790 void WebResourceLoadStatisticsStore::pruneStatisticsIfNeeded()
791 {
792     ASSERT(!RunLoop::isMain());
793     if (m_resourceStatisticsMap.size() <= m_parameters.maxStatisticsEntries)
794         return;
795
796     ASSERT(m_parameters.pruneEntriesDownTo <= m_parameters.maxStatisticsEntries);
797
798     size_t numberOfEntriesLeftToPrune = m_resourceStatisticsMap.size() - m_parameters.pruneEntriesDownTo;
799     ASSERT(numberOfEntriesLeftToPrune);
800     
801     Vector<StatisticsLastSeen> resourcesToPrunePerImportance[maxImportance + 1];
802     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
803         resourcesToPrunePerImportance[computeImportance(resourceStatistic)].append({ resourceStatistic.highLevelDomain, resourceStatistic.lastSeen });
804     
805     for (unsigned importance = 0; numberOfEntriesLeftToPrune && importance <= maxImportance; ++importance)
806         pruneResources(m_resourceStatisticsMap, resourcesToPrunePerImportance[importance], numberOfEntriesLeftToPrune);
807
808     ASSERT(!numberOfEntriesLeftToPrune);
809 }
810
811 void WebResourceLoadStatisticsStore::resetParametersToDefaultValues()
812 {
813     m_parameters = { };
814 }
815     
816 } // namespace WebKit