4e2881ab3eff03b38cff1b17ca49bc1a2b58d8c0
[WebKit-https.git] / Source / WebKit / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "Logging.h"
30 #include "WebProcessMessages.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebResourceLoadStatisticsTelemetry.h"
34 #include "WebsiteDataFetchOption.h"
35 #include "WebsiteDataStore.h"
36 #include "WebsiteDataType.h"
37 #include <WebCore/KeyedCoding.h>
38 #include <WebCore/ResourceLoadStatistics.h>
39 #include <wtf/CrossThreadCopier.h>
40 #include <wtf/DateMath.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/NeverDestroyed.h>
43
44 using namespace WebCore;
45
46 namespace WebKit {
47
48 constexpr unsigned operatingDatesWindow { 30 };
49 constexpr unsigned statisticsModelVersion { 9 };
50 constexpr unsigned maxImportance { 3 };
51
52 template<typename T> static inline String isolatedPrimaryDomain(const T& value)
53 {
54     return ResourceLoadStatistics::primaryDomain(value).isolatedCopy();
55 }
56
57 static const OptionSet<WebsiteDataType>& dataTypesToRemove()
58 {
59     static NeverDestroyed<OptionSet<WebsiteDataType>> dataTypes(std::initializer_list<WebsiteDataType>({
60         WebsiteDataType::Cookies,
61         WebsiteDataType::IndexedDBDatabases,
62         WebsiteDataType::LocalStorage,
63 #if ENABLE(MEDIA_STREAM)
64         WebsiteDataType::MediaDeviceIdentifier,
65 #endif
66         WebsiteDataType::MediaKeys,
67         WebsiteDataType::OfflineWebApplicationCache,
68 #if ENABLE(NETSCAPE_PLUGIN_API)
69         WebsiteDataType::PlugInData,
70 #endif
71         WebsiteDataType::SearchFieldRecentSearches,
72         WebsiteDataType::SessionStorage,
73         WebsiteDataType::WebSQLDatabases,
74     }));
75
76     ASSERT(RunLoop::isMain());
77
78     return dataTypes;
79 }
80
81 class OperatingDate {
82 public:
83     OperatingDate() = default;
84
85     static OperatingDate fromWallTime(WallTime time)
86     {
87         double ms = time.secondsSinceEpoch().milliseconds();
88         int year = msToYear(ms);
89         int yearDay = dayInYear(ms, year);
90         int month = monthFromDayInYear(yearDay, isLeapYear(year));
91         int monthDay = dayInMonthFromDayInYear(yearDay, isLeapYear(year));
92
93         return OperatingDate { year, month, monthDay };
94     }
95
96     static OperatingDate today()
97     {
98         return OperatingDate::fromWallTime(WallTime::now());
99     }
100
101     Seconds secondsSinceEpoch() const
102     {
103         return Seconds { dateToDaysFrom1970(m_year, m_month, m_monthDay) * secondsPerDay };
104     }
105
106     bool operator==(const OperatingDate& other) const
107     {
108         return m_monthDay == other.m_monthDay && m_month == other.m_month && m_year == other.m_year;
109     }
110
111     bool operator<(const OperatingDate& other) const
112     {
113         return secondsSinceEpoch() < other.secondsSinceEpoch();
114     }
115
116     bool operator<=(const OperatingDate& other) const
117     {
118         return secondsSinceEpoch() <= other.secondsSinceEpoch();
119     }
120
121 private:
122     OperatingDate(int year, int month, int monthDay)
123         : m_year(year)
124         , m_month(month)
125         , m_monthDay(monthDay)
126     { }
127
128     int m_year { 0 };
129     int m_month { 0 }; // [0, 11].
130     int m_monthDay { 0 }; // [1, 31].
131 };
132
133 static Vector<OperatingDate> mergeOperatingDates(const Vector<OperatingDate>& existingDates, Vector<OperatingDate>&& newDates)
134 {
135     if (existingDates.isEmpty())
136         return WTFMove(newDates);
137
138     Vector<OperatingDate> mergedDates(existingDates.size() + newDates.size());
139
140     // Merge the two sorted vectors of dates.
141     std::merge(existingDates.begin(), existingDates.end(), newDates.begin(), newDates.end(), mergedDates.begin());
142     // Remove duplicate dates.
143     removeRepeatedElements(mergedDates);
144
145     // Drop old dates until the Vector size reaches operatingDatesWindow.
146     while (mergedDates.size() > operatingDatesWindow)
147         mergedDates.remove(0);
148
149     return mergedDates;
150 }
151
152 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory, UpdateCookiePartitioningForDomainsHandler&& updateCookiePartitioningForDomainsHandler)
153     : m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue", WorkQueue::Type::Serial, WorkQueue::QOS::Utility))
154     , m_persistentStorage(*this, resourceLoadStatisticsDirectory)
155     , m_updateCookiePartitioningForDomainsHandler(WTFMove(updateCookiePartitioningForDomainsHandler))
156     , m_dailyTasksTimer(RunLoop::main(), this, &WebResourceLoadStatisticsStore::performDailyTasks)
157 {
158     ASSERT(RunLoop::isMain());
159
160 #if PLATFORM(COCOA)
161     registerUserDefaultsIfNeeded();
162 #endif
163
164     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
165         m_persistentStorage.initialize();
166         includeTodayAsOperatingDateIfNecessary();
167     });
168
169     m_statisticsQueue->dispatchAfter(5_s, [this, protectedThis = makeRef(*this)] {
170         if (m_parameters.shouldSubmitTelemetry)
171             WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
172     });
173
174     m_dailyTasksTimer.startRepeating(24_h);
175 }
176
177 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
178 {
179 }
180     
181 void WebResourceLoadStatisticsStore::removeDataRecords()
182 {
183     ASSERT(!RunLoop::isMain());
184     
185     if (!shouldRemoveDataRecords())
186         return;
187
188     auto prevalentResourceDomains = topPrivatelyControlledDomainsToRemoveWebsiteDataFor();
189     if (prevalentResourceDomains.isEmpty())
190         return;
191     
192     setDataRecordsBeingRemoved(true);
193
194     RunLoop::main().dispatch([prevalentResourceDomains = CrossThreadCopier<Vector<String>>::copy(prevalentResourceDomains), this, protectedThis = makeRef(*this)] () mutable {
195         WebProcessProxy::deleteWebsiteDataForTopPrivatelyControlledDomainsInAllPersistentDataStores(dataTypesToRemove(), WTFMove(prevalentResourceDomains), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)](const HashSet<String>& domainsWithDeletedWebsiteData) mutable {
196             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(domainsWithDeletedWebsiteData)] () mutable {
197                 for (auto& prevalentResourceDomain : topDomains) {
198                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(prevalentResourceDomain);
199                     ++statistic.dataRecordsRemoved;
200                 }
201                 setDataRecordsBeingRemoved(false);
202             });
203         });
204     });
205 }
206
207 void WebResourceLoadStatisticsStore::scheduleStatisticsAndDataRecordsProcessing()
208 {
209     ASSERT(RunLoop::isMain());
210     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
211         processStatisticsAndDataRecords();
212     });
213 }
214
215 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
216 {
217     ASSERT(!RunLoop::isMain());
218
219     if (m_parameters.shouldClassifyResourcesBeforeDataRecordsRemoval) {
220         for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
221             if (!resourceStatistic.isPrevalentResource && m_resourceLoadStatisticsClassifier.hasPrevalentResourceCharacteristics(resourceStatistic))
222                 resourceStatistic.isPrevalentResource = true;
223         }
224     }
225     removeDataRecords();
226
227     pruneStatisticsIfNeeded();
228
229     if (m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned) {
230         RunLoop::main().dispatch([] {
231             WebProcessProxy::notifyPageStatisticsAndDataRecordsProcessed();
232         });
233     }
234
235     m_persistentStorage.scheduleOrWriteMemoryStore();
236 }
237
238 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(Vector<WebCore::ResourceLoadStatistics>&& origins)
239 {
240     ASSERT(!RunLoop::isMain());
241
242     mergeStatistics(WTFMove(origins));
243     // Fire before processing statistics to propagate user interaction as fast as possible to the network process.
244     updateCookiePartitioning();
245     processStatisticsAndDataRecords();
246 }
247
248 void WebResourceLoadStatisticsStore::grandfatherExistingWebsiteData()
249 {
250     ASSERT(!RunLoop::isMain());
251
252     RunLoop::main().dispatch([this, protectedThis = makeRef(*this)] () mutable {
253         WebProcessProxy::topPrivatelyControlledDomainsWithWebsiteData(dataTypesToRemove(), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)] (HashSet<String>&& topPrivatelyControlledDomainsWithWebsiteData) mutable {
254             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(topPrivatelyControlledDomainsWithWebsiteData)] () mutable {
255                 for (auto& topPrivatelyControlledDomain : topDomains) {
256                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(topPrivatelyControlledDomain);
257                     statistic.grandfathered = true;
258                 }
259                 m_endOfGrandfatheringTimestamp = WallTime::now() + m_parameters.grandfatheringTime;
260             });
261         });
262     });
263 }
264     
265 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
266 {
267     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
268 }
269
270 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
271 {
272     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
273 }
274
275 void WebResourceLoadStatisticsStore::applicationWillTerminate()
276 {
277     m_persistentStorage.finishAllPendingWorkSynchronously();
278 }
279
280 void WebResourceLoadStatisticsStore::performDailyTasks()
281 {
282     ASSERT(RunLoop::isMain());
283
284     includeTodayAsOperatingDateIfNecessary();
285     if (m_parameters.shouldSubmitTelemetry)
286         submitTelemetry();
287 }
288
289 void WebResourceLoadStatisticsStore::submitTelemetry()
290 {
291     ASSERT(RunLoop::isMain());
292     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
293         WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
294     });
295 }
296
297 void WebResourceLoadStatisticsStore::logUserInteraction(const URL& url)
298 {
299     if (url.isBlankURL() || url.isEmpty())
300         return;
301
302     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
303         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
304         statistics.hadUserInteraction = true;
305         statistics.mostRecentUserInteractionTime = WallTime::now();
306
307         updateCookiePartitioningForDomains({ primaryDomain }, { }, ShouldClearFirst::No);
308     });
309 }
310
311 void WebResourceLoadStatisticsStore::clearUserInteraction(const URL& url)
312 {
313     if (url.isBlankURL() || url.isEmpty())
314         return;
315
316     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
317         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
318         statistics.hadUserInteraction = false;
319         statistics.mostRecentUserInteractionTime = { };
320     });
321 }
322
323 void WebResourceLoadStatisticsStore::hasHadUserInteraction(const URL& url, WTF::Function<void (bool)>&& completionHandler)
324 {
325     if (url.isBlankURL() || url.isEmpty()) {
326         completionHandler(false);
327         return;
328     }
329
330     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
331         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
332         bool hadUserInteraction = mapEntry == m_resourceStatisticsMap.end() ? false: hasHadUnexpiredRecentUserInteraction(mapEntry->value);
333         RunLoop::main().dispatch([hadUserInteraction, completionHandler = WTFMove(completionHandler)] {
334             completionHandler(hadUserInteraction);
335         });
336     });
337 }
338
339 void WebResourceLoadStatisticsStore::setLastSeen(const URL& url, Seconds seconds)
340 {
341     if (url.isBlankURL() || url.isEmpty())
342         return;
343     
344     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), seconds] {
345         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
346         statistics.lastSeen = WallTime::fromRawSeconds(seconds.seconds());
347     });
348 }
349     
350 void WebResourceLoadStatisticsStore::setPrevalentResource(const URL& url)
351 {
352     if (url.isBlankURL() || url.isEmpty())
353         return;
354
355     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
356         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
357         statistics.isPrevalentResource = true;
358     });
359 }
360
361 void WebResourceLoadStatisticsStore::isPrevalentResource(const URL& url, WTF::Function<void (bool)>&& completionHandler)
362 {
363     if (url.isBlankURL() || url.isEmpty()) {
364         completionHandler(false);
365         return;
366     }
367
368     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
369         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
370         bool isPrevalentResource = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.isPrevalentResource;
371         RunLoop::main().dispatch([isPrevalentResource, completionHandler = WTFMove(completionHandler)] {
372             completionHandler(isPrevalentResource);
373         });
374     });
375 }
376
377 void WebResourceLoadStatisticsStore::clearPrevalentResource(const URL& url)
378 {
379     if (url.isBlankURL() || url.isEmpty())
380         return;
381
382     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
383         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
384         statistics.isPrevalentResource = false;
385     });
386 }
387
388 void WebResourceLoadStatisticsStore::setGrandfathered(const URL& url, bool value)
389 {
390     if (url.isBlankURL() || url.isEmpty())
391         return;
392
393     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), value] {
394         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
395         statistics.grandfathered = value;
396     });
397 }
398
399 void WebResourceLoadStatisticsStore::isGrandfathered(const URL& url, WTF::Function<void (bool)>&& completionHandler)
400 {
401     if (url.isBlankURL() || url.isEmpty()) {
402         completionHandler(false);
403         return;
404     }
405
406     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), completionHandler = WTFMove(completionHandler), primaryDomain = isolatedPrimaryDomain(url)] () mutable {
407         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
408         bool isGrandFathered = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.grandfathered;
409         RunLoop::main().dispatch([isGrandFathered, completionHandler = WTFMove(completionHandler)] {
410             completionHandler(isGrandFathered);
411         });
412     });
413 }
414
415 void WebResourceLoadStatisticsStore::setSubframeUnderTopFrameOrigin(const URL& subframe, const URL& topFrame)
416 {
417     if (subframe.isBlankURL() || subframe.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
418         return;
419
420     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubFrameDomain = isolatedPrimaryDomain(subframe)] {
421         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubFrameDomain);
422         statistics.subframeUnderTopFrameOrigins.add(primaryTopFrameDomain);
423     });
424 }
425
426 void WebResourceLoadStatisticsStore::setSubresourceUnderTopFrameOrigin(const URL& subresource, const URL& topFrame)
427 {
428     if (subresource.isBlankURL() || subresource.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
429         return;
430
431     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
432         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
433         statistics.subresourceUnderTopFrameOrigins.add(primaryTopFrameDomain);
434     });
435 }
436
437 void WebResourceLoadStatisticsStore::setSubresourceUniqueRedirectTo(const URL& subresource, const URL& hostNameRedirectedTo)
438 {
439     if (subresource.isBlankURL() || subresource.isEmpty() || hostNameRedirectedTo.isBlankURL() || hostNameRedirectedTo.isEmpty())
440         return;
441
442     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryRedirectDomain = isolatedPrimaryDomain(hostNameRedirectedTo), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
443         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
444         statistics.subresourceUniqueRedirectsTo.add(primaryRedirectDomain);
445     });
446 }
447
448 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdate()
449 {
450     // Helper function used by testing system. Should only be called from the main thread.
451     ASSERT(RunLoop::isMain());
452
453     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
454         updateCookiePartitioning();
455     });
456 }
457
458 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdateForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
459 {
460     // Helper function used by testing system. Should only be called from the main thread.
461     ASSERT(RunLoop::isMain());
462     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd), shouldClearFirst] {
463         updateCookiePartitioningForDomains(domainsToRemove, domainsToAdd, shouldClearFirst);
464     });
465 }
466
467 #if HAVE(CFNETWORK_STORAGE_PARTITIONING)
468 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningStateReset()
469 {
470     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
471         resetCookiePartitioningState();
472     });
473 }
474 #endif
475
476 void WebResourceLoadStatisticsStore::scheduleClearInMemory()
477 {
478     ASSERT(RunLoop::isMain());
479     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
480         clearInMemory();
481     });
482 }
483
484 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent()
485 {
486     ASSERT(RunLoop::isMain());
487     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
488         clearInMemory();
489         m_persistentStorage.clear();
490         grandfatherExistingWebsiteData();
491     });
492 }
493
494 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent(std::chrono::system_clock::time_point modifiedSince)
495 {
496     // For now, be conservative and clear everything regardless of modifiedSince.
497     UNUSED_PARAM(modifiedSince);
498     scheduleClearInMemoryAndPersistent();
499 }
500
501 void WebResourceLoadStatisticsStore::setTimeToLiveUserInteraction(Seconds seconds)
502 {
503     ASSERT(seconds >= 0_s);
504     m_parameters.timeToLiveUserInteraction = seconds;
505 }
506
507 void WebResourceLoadStatisticsStore::setTimeToLiveCookiePartitionFree(Seconds seconds)
508 {
509     ASSERT(seconds >= 0_s);
510     m_parameters.timeToLiveCookiePartitionFree = seconds;
511 }
512
513 void WebResourceLoadStatisticsStore::setMinimumTimeBetweenDataRecordsRemoval(Seconds seconds)
514 {
515     ASSERT(seconds >= 0_s);
516     m_parameters.minimumTimeBetweenDataRecordsRemoval = seconds;
517 }
518
519 void WebResourceLoadStatisticsStore::setGrandfatheringTime(Seconds seconds)
520 {
521     ASSERT(seconds >= 0_s);
522     m_parameters.grandfatheringTime = seconds;
523 }
524
525 bool WebResourceLoadStatisticsStore::shouldRemoveDataRecords() const
526 {
527     ASSERT(!RunLoop::isMain());
528     if (m_dataRecordsBeingRemoved)
529         return false;
530
531     return !m_lastTimeDataRecordsWereRemoved || MonotonicTime::now() >= (m_lastTimeDataRecordsWereRemoved + m_parameters.minimumTimeBetweenDataRecordsRemoval);
532 }
533
534 void WebResourceLoadStatisticsStore::setDataRecordsBeingRemoved(bool value)
535 {
536     ASSERT(!RunLoop::isMain());
537     m_dataRecordsBeingRemoved = value;
538     if (m_dataRecordsBeingRemoved)
539         m_lastTimeDataRecordsWereRemoved = MonotonicTime::now();
540 }
541
542 ResourceLoadStatistics& WebResourceLoadStatisticsStore::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
543 {
544     ASSERT(!RunLoop::isMain());
545     return m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
546         return ResourceLoadStatistics(primaryDomain);
547     }).iterator->value;
548 }
549
550 std::unique_ptr<KeyedEncoder> WebResourceLoadStatisticsStore::createEncoderFromData() const
551 {
552     ASSERT(!RunLoop::isMain());
553     auto encoder = KeyedEncoder::encoder();
554     encoder->encodeUInt32("version", statisticsModelVersion);
555     encoder->encodeDouble("endOfGrandfatheringTimestamp", m_endOfGrandfatheringTimestamp.secondsSinceEpoch().value());
556
557     encoder->encodeObjects("browsingStatistics", m_resourceStatisticsMap.begin(), m_resourceStatisticsMap.end(), [](KeyedEncoder& encoderInner, const auto& origin) {
558         origin.value.encode(encoderInner);
559     });
560
561     encoder->encodeObjects("operatingDates", m_operatingDates.begin(), m_operatingDates.end(), [](KeyedEncoder& encoderInner, OperatingDate date) {
562         encoderInner.encodeDouble("date", date.secondsSinceEpoch().value());
563     });
564
565     return encoder;
566 }
567
568 void WebResourceLoadStatisticsStore::mergeWithDataFromDecoder(KeyedDecoder& decoder)
569 {
570     ASSERT(!RunLoop::isMain());
571
572     unsigned versionOnDisk;
573     if (!decoder.decodeUInt32("version", versionOnDisk))
574         return;
575
576     if (versionOnDisk != statisticsModelVersion)
577         return;
578
579     double endOfGrandfatheringTimestamp;
580     if (decoder.decodeDouble("endOfGrandfatheringTimestamp", endOfGrandfatheringTimestamp))
581         m_endOfGrandfatheringTimestamp = WallTime::fromRawSeconds(endOfGrandfatheringTimestamp);
582     else
583         m_endOfGrandfatheringTimestamp = { };
584
585     Vector<ResourceLoadStatistics> loadedStatistics;
586     bool succeeded = decoder.decodeObjects("browsingStatistics", loadedStatistics, [](KeyedDecoder& decoderInner, ResourceLoadStatistics& statistics) {
587         return statistics.decode(decoderInner);
588     });
589
590     if (!succeeded)
591         return;
592
593     mergeStatistics(WTFMove(loadedStatistics));
594     updateCookiePartitioning();
595
596     Vector<OperatingDate> operatingDates;
597     succeeded = decoder.decodeObjects("operatingDates", operatingDates, [](KeyedDecoder& decoder, OperatingDate& date) {
598         double value;
599         if (!decoder.decodeDouble("date", value))
600             return false;
601
602         date = OperatingDate::fromWallTime(WallTime::fromRawSeconds(value));
603         return true;
604     });
605
606     if (!succeeded)
607         return;
608
609     m_operatingDates = mergeOperatingDates(m_operatingDates, WTFMove(operatingDates));
610 }
611
612 void WebResourceLoadStatisticsStore::clearInMemory()
613 {
614     ASSERT(!RunLoop::isMain());
615     m_resourceStatisticsMap.clear();
616     m_operatingDates.clear();
617
618     updateCookiePartitioningForDomains({ }, { }, ShouldClearFirst::Yes);
619 }
620
621 void WebResourceLoadStatisticsStore::mergeStatistics(Vector<ResourceLoadStatistics>&& statistics)
622 {
623     ASSERT(!RunLoop::isMain());
624     for (auto& statistic : statistics) {
625         auto result = m_resourceStatisticsMap.ensure(statistic.highLevelDomain, [&statistic] {
626             return WTFMove(statistic);
627         });
628         if (!result.isNewEntry)
629             result.iterator->value.merge(statistic);
630     }
631 }
632
633 inline bool WebResourceLoadStatisticsStore::shouldPartitionCookies(const ResourceLoadStatistics& statistic) const
634 {
635     return statistic.isPrevalentResource && (!statistic.hadUserInteraction || WallTime::now() > statistic.mostRecentUserInteractionTime + m_parameters.timeToLiveCookiePartitionFree);
636 }
637
638 void WebResourceLoadStatisticsStore::updateCookiePartitioning()
639 {
640     ASSERT(!RunLoop::isMain());
641
642     Vector<String> domainsToRemove;
643     Vector<String> domainsToAdd;
644     for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
645         bool shouldPartition = shouldPartitionCookies(resourceStatistic);
646         if (resourceStatistic.isMarkedForCookiePartitioning && !shouldPartition) {
647             resourceStatistic.isMarkedForCookiePartitioning = false;
648             domainsToRemove.append(resourceStatistic.highLevelDomain);
649         } else if (!resourceStatistic.isMarkedForCookiePartitioning && shouldPartition) {
650             resourceStatistic.isMarkedForCookiePartitioning = true;
651             domainsToAdd.append(resourceStatistic.highLevelDomain);
652         }
653     }
654
655     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
656         return;
657
658     RunLoop::main().dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
659         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, ShouldClearFirst::No);
660     });
661 }
662
663 void WebResourceLoadStatisticsStore::updateCookiePartitioningForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
664 {
665     ASSERT(!RunLoop::isMain());
666     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
667         return;
668
669     RunLoop::main().dispatch([this, shouldClearFirst, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
670         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, shouldClearFirst);
671     });
672
673     if (shouldClearFirst == ShouldClearFirst::Yes)
674         resetCookiePartitioningState();
675     else {
676         for (auto& domain : domainsToRemove)
677             ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = false;
678     }
679
680     for (auto& domain : domainsToAdd)
681         ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = true;
682 }
683
684 void WebResourceLoadStatisticsStore::resetCookiePartitioningState()
685 {
686     ASSERT(!RunLoop::isMain());
687     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
688         resourceStatistic.isMarkedForCookiePartitioning = false;
689 }
690
691 void WebResourceLoadStatisticsStore::processStatistics(const WTF::Function<void (const ResourceLoadStatistics&)>& processFunction) const
692 {
693     ASSERT(!RunLoop::isMain());
694     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
695         processFunction(resourceStatistic);
696 }
697
698 bool WebResourceLoadStatisticsStore::hasHadUnexpiredRecentUserInteraction(ResourceLoadStatistics& resourceStatistic) const
699 {
700     if (resourceStatistic.hadUserInteraction && hasStatisticsExpired(resourceStatistic)) {
701         // Drop privacy sensitive data because we no longer need it.
702         // Set timestamp to 0 so that statistics merge will know
703         // it has been reset as opposed to its default -1.
704         resourceStatistic.mostRecentUserInteractionTime = { };
705         resourceStatistic.hadUserInteraction = false;
706     }
707
708     return resourceStatistic.hadUserInteraction;
709 }
710
711 Vector<String> WebResourceLoadStatisticsStore::topPrivatelyControlledDomainsToRemoveWebsiteDataFor()
712 {
713     ASSERT(!RunLoop::isMain());
714
715     bool shouldCheckForGrandfathering = m_endOfGrandfatheringTimestamp > WallTime::now();
716     bool shouldClearGrandfathering = !shouldCheckForGrandfathering && m_endOfGrandfatheringTimestamp;
717
718     if (shouldClearGrandfathering)
719         m_endOfGrandfatheringTimestamp = { };
720
721     Vector<String> prevalentResources;
722     for (auto& statistic : m_resourceStatisticsMap.values()) {
723         if (statistic.isPrevalentResource && !hasHadUnexpiredRecentUserInteraction(statistic) && (!shouldCheckForGrandfathering || !statistic.grandfathered))
724             prevalentResources.append(statistic.highLevelDomain);
725
726         if (shouldClearGrandfathering && statistic.grandfathered)
727             statistic.grandfathered = false;
728     }
729
730     return prevalentResources;
731 }
732
733 void WebResourceLoadStatisticsStore::includeTodayAsOperatingDateIfNecessary()
734 {
735     auto today = OperatingDate::today();
736     if (!m_operatingDates.isEmpty() && today <= m_operatingDates.last())
737         return;
738
739     while (m_operatingDates.size() >= operatingDatesWindow)
740         m_operatingDates.remove(0);
741
742     m_operatingDates.append(today);
743 }
744
745 bool WebResourceLoadStatisticsStore::hasStatisticsExpired(const ResourceLoadStatistics& resourceStatistic) const
746 {
747     if (m_operatingDates.size() >= operatingDatesWindow) {
748         if (OperatingDate::fromWallTime(resourceStatistic.mostRecentUserInteractionTime) < m_operatingDates.first())
749             return true;
750     }
751
752     // If we don't meet the real criteria for an expired statistic, check the user setting for a tighter restriction (mainly for testing).
753     if (m_parameters.timeToLiveUserInteraction) {
754         if (WallTime::now() > resourceStatistic.mostRecentUserInteractionTime + m_parameters.timeToLiveUserInteraction.value())
755             return true;
756     }
757
758     return false;
759 }
760     
761 void WebResourceLoadStatisticsStore::setMaxStatisticsEntries(size_t maximumEntryCount)
762 {
763     m_parameters.maxStatisticsEntries = maximumEntryCount;
764 }
765     
766 void WebResourceLoadStatisticsStore::setPruneEntriesDownTo(size_t pruneTargetCount)
767 {
768     m_parameters.pruneEntriesDownTo = pruneTargetCount;
769 }
770     
771 struct StatisticsLastSeen {
772     String topPrivatelyOwnedDomain;
773     WallTime lastSeen;
774 };
775     
776 static void pruneResources(HashMap<String, WebCore::ResourceLoadStatistics>& statisticsMap, Vector<StatisticsLastSeen>& statisticsToPrune, size_t& numberOfEntriesToPrune)
777 {
778     if (statisticsToPrune.size() > numberOfEntriesToPrune) {
779         std::sort(statisticsToPrune.begin(), statisticsToPrune.end(), [](const StatisticsLastSeen& a, const StatisticsLastSeen& b) {
780             return a.lastSeen < b.lastSeen;
781         });
782     }
783
784     for (size_t i = 0, end = std::min(numberOfEntriesToPrune, statisticsToPrune.size()); i != end; ++i, --numberOfEntriesToPrune)
785         statisticsMap.remove(statisticsToPrune[i].topPrivatelyOwnedDomain);
786 }
787     
788 static unsigned computeImportance(const ResourceLoadStatistics& resourceStatistic)
789 {
790     unsigned importance = maxImportance;
791     if (!resourceStatistic.isPrevalentResource)
792         importance -= 1;
793     if (!resourceStatistic.hadUserInteraction)
794         importance -= 2;
795     return importance;
796 }
797     
798 void WebResourceLoadStatisticsStore::pruneStatisticsIfNeeded()
799 {
800     ASSERT(!RunLoop::isMain());
801     if (m_resourceStatisticsMap.size() <= m_parameters.maxStatisticsEntries)
802         return;
803
804     ASSERT(m_parameters.pruneEntriesDownTo <= m_parameters.maxStatisticsEntries);
805
806     size_t numberOfEntriesLeftToPrune = m_resourceStatisticsMap.size() - m_parameters.pruneEntriesDownTo;
807     ASSERT(numberOfEntriesLeftToPrune);
808     
809     Vector<StatisticsLastSeen> resourcesToPrunePerImportance[maxImportance + 1];
810     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
811         resourcesToPrunePerImportance[computeImportance(resourceStatistic)].append({ resourceStatistic.highLevelDomain, resourceStatistic.lastSeen });
812     
813     for (unsigned importance = 0; numberOfEntriesLeftToPrune && importance <= maxImportance; ++importance)
814         pruneResources(m_resourceStatisticsMap, resourcesToPrunePerImportance[importance], numberOfEntriesLeftToPrune);
815
816     ASSERT(!numberOfEntriesLeftToPrune);
817 }
818
819 void WebResourceLoadStatisticsStore::resetParametersToDefaultValues()
820 {
821     m_parameters = { };
822 }
823     
824 } // namespace WebKit