185a18b966ec092fe91452a91d25999e4cae2189
[WebKit.git] / Source / WebKit2 / UIProcess / WebResourceLoadStatisticsStore.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsStore.h"
28
29 #include "Logging.h"
30 #include "WebProcessMessages.h"
31 #include "WebProcessProxy.h"
32 #include "WebResourceLoadStatisticsStoreMessages.h"
33 #include "WebResourceLoadStatisticsTelemetry.h"
34 #include "WebsiteDataFetchOption.h"
35 #include "WebsiteDataStore.h"
36 #include "WebsiteDataType.h"
37 #include <WebCore/FileMonitor.h>
38 #include <WebCore/FileSystem.h>
39 #include <WebCore/KeyedCoding.h>
40 #include <WebCore/ResourceLoadStatistics.h>
41 #include <WebCore/SharedBuffer.h>
42 #include <wtf/CrossThreadCopier.h>
43 #include <wtf/MathExtras.h>
44 #include <wtf/NeverDestroyed.h>
45 #include <wtf/threads/BinarySemaphore.h>
46
47 using namespace WebCore;
48
49 namespace WebKit {
50
51 constexpr Seconds minimumStatisticsFileWriteInterval { 5_min };
52 constexpr unsigned operatingDatesWindow { 30 };
53 constexpr unsigned statisticsModelVersion { 7 };
54 constexpr unsigned maxImportance { 3 };
55
56 template<typename T> static inline String isolatedPrimaryDomain(const T& value)
57 {
58     return ResourceLoadStatistics::primaryDomain(value).isolatedCopy();
59 }
60
61 static const OptionSet<WebsiteDataType>& dataTypesToRemove()
62 {
63     static NeverDestroyed<OptionSet<WebsiteDataType>> dataTypes(std::initializer_list<WebsiteDataType>({
64         WebsiteDataType::Cookies,
65         WebsiteDataType::IndexedDBDatabases,
66         WebsiteDataType::LocalStorage,
67 #if ENABLE(MEDIA_STREAM)
68         WebsiteDataType::MediaDeviceIdentifier,
69 #endif
70         WebsiteDataType::MediaKeys,
71         WebsiteDataType::OfflineWebApplicationCache,
72 #if ENABLE(NETSCAPE_PLUGIN_API)
73         WebsiteDataType::PlugInData,
74 #endif
75         WebsiteDataType::SearchFieldRecentSearches,
76         WebsiteDataType::SessionStorage,
77         WebsiteDataType::WebSQLDatabases,
78     }));
79
80     ASSERT(RunLoop::isMain());
81
82     return dataTypes;
83 }
84
85 WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory, UpdateCookiePartitioningForDomainsHandler&& updateCookiePartitioningForDomainsHandler)
86     : m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue", WorkQueue::Type::Serial, WorkQueue::QOS::Utility))
87     , m_updateCookiePartitioningForDomainsHandler(WTFMove(updateCookiePartitioningForDomainsHandler))
88     , m_statisticsStoragePath(resourceLoadStatisticsDirectory)
89     , m_dailyTasksTimer(RunLoop::main(), this, &WebResourceLoadStatisticsStore::performDailyTasks)
90 {
91     ASSERT(RunLoop::isMain());
92
93 #if PLATFORM(COCOA)
94     registerUserDefaultsIfNeeded();
95 #endif
96
97     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
98         readDataFromDiskIfNeeded();
99         startMonitoringStatisticsStorage();
100     });
101     m_statisticsQueue->dispatchAfter(5_s, [this, protectedThis = makeRef(*this)] {
102         if (m_parameters.shouldSubmitTelemetry)
103             WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
104     });
105
106     m_dailyTasksTimer.startRepeating(24_h);
107 }
108
109 WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
110 {
111 }
112     
113 void WebResourceLoadStatisticsStore::removeDataRecords()
114 {
115     ASSERT(!RunLoop::isMain());
116     
117     if (!shouldRemoveDataRecords())
118         return;
119
120     auto prevalentResourceDomains = topPrivatelyControlledDomainsToRemoveWebsiteDataFor();
121     if (prevalentResourceDomains.isEmpty())
122         return;
123     
124     setDataRecordsBeingRemoved(true);
125
126     RunLoop::main().dispatch([prevalentResourceDomains = CrossThreadCopier<Vector<String>>::copy(prevalentResourceDomains), this, protectedThis = makeRef(*this)] () mutable {
127         WebProcessProxy::deleteWebsiteDataForTopPrivatelyControlledDomainsInAllPersistentDataStores(dataTypesToRemove(), WTFMove(prevalentResourceDomains), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)](const HashSet<String>& domainsWithDeletedWebsiteData) mutable {
128             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(domainsWithDeletedWebsiteData)] () mutable {
129                 for (auto& prevalentResourceDomain : topDomains) {
130                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(prevalentResourceDomain);
131                     ++statistic.dataRecordsRemoved;
132                 }
133                 setDataRecordsBeingRemoved(false);
134             });
135         });
136     });
137 }
138
139 void WebResourceLoadStatisticsStore::processStatisticsAndDataRecords()
140 {
141     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] () {
142         if (m_parameters.shouldClassifyResourcesBeforeDataRecordsRemoval) {
143             for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
144                 if (!resourceStatistic.isPrevalentResource && m_resourceLoadStatisticsClassifier.hasPrevalentResourceCharacteristics(resourceStatistic))
145                     resourceStatistic.isPrevalentResource = true;
146             }
147         }
148         removeDataRecords();
149         
150         pruneStatisticsIfNeeded();
151
152         if (m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned) {
153             RunLoop::main().dispatch([] {
154                 WebProcessProxy::notifyPageStatisticsAndDataRecordsProcessed();
155             });
156         }
157
158         scheduleOrWriteStoreToDisk();
159     });
160 }
161
162 void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(Vector<WebCore::ResourceLoadStatistics>&& origins)
163 {
164     ASSERT(!RunLoop::isMain());
165
166     mergeStatistics(WTFMove(origins));
167     // Fire before processing statistics to propagate user interaction as fast as possible to the network process.
168     updateCookiePartitioning();
169     processStatisticsAndDataRecords();
170 }
171
172 void WebResourceLoadStatisticsStore::grandfatherExistingWebsiteData()
173 {
174     ASSERT(!RunLoop::isMain());
175
176     RunLoop::main().dispatch([this, protectedThis = makeRef(*this)] () mutable {
177         WebProcessProxy::topPrivatelyControlledDomainsWithWebsiteData(dataTypesToRemove(), m_parameters.shouldNotifyPagesWhenDataRecordsWereScanned, [this, protectedThis = WTFMove(protectedThis)] (HashSet<String>&& topPrivatelyControlledDomainsWithWebsiteData) mutable {
178             m_statisticsQueue->dispatch([this, protectedThis = WTFMove(protectedThis), topDomains = CrossThreadCopier<HashSet<String>>::copy(topPrivatelyControlledDomainsWithWebsiteData)] () mutable {
179                 for (auto& topPrivatelyControlledDomain : topDomains) {
180                     auto& statistic = ensureResourceStatisticsForPrimaryDomain(topPrivatelyControlledDomain);
181                     statistic.grandfathered = true;
182                 }
183                 m_endOfGrandfatheringTimestamp = WallTime::now() + m_parameters.grandfatheringTime;
184             });
185         });
186     });
187 }
188
189 WallTime WebResourceLoadStatisticsStore::statisticsFileModificationTime(const String& path) const
190 {
191     ASSERT(!RunLoop::isMain());
192     time_t modificationTime;
193     if (!getFileModificationTime(path, modificationTime))
194         return { };
195
196     return WallTime::fromRawSeconds(modificationTime);
197 }
198
199 bool WebResourceLoadStatisticsStore::hasStatisticsFileChangedSinceLastSync(const String& path) const
200 {
201     return statisticsFileModificationTime(path) > m_lastStatisticsFileSyncTime;
202 }
203
204 void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
205 {
206     ASSERT(!RunLoop::isMain());
207
208     String resourceLog = resourceLogFilePath();
209     if (resourceLog.isEmpty() || !fileExists(resourceLog)) {
210         grandfatherExistingWebsiteData();
211         return;
212     }
213
214     if (!hasStatisticsFileChangedSinceLastSync(resourceLog)) {
215         // No need to grandfather in this case.
216         return;
217     }
218
219     WallTime readTime = WallTime::now();
220
221     auto decoder = createDecoderFromDisk(resourceLog);
222     if (!decoder) {
223         grandfatherExistingWebsiteData();
224         return;
225     }
226
227     clearInMemory();
228     populateFromDecoder(*decoder);
229
230     m_lastStatisticsFileSyncTime = readTime;
231
232     if (m_resourceStatisticsMap.isEmpty())
233         grandfatherExistingWebsiteData();
234
235     includeTodayAsOperatingDateIfNecessary();
236 }
237     
238 void WebResourceLoadStatisticsStore::refreshFromDisk()
239 {
240     ASSERT(!RunLoop::isMain());
241
242     String resourceLog = resourceLogFilePath();
243     if (resourceLog.isEmpty())
244         return;
245
246     // We sometimes see file changed events from before our load completed (we start
247     // reading at the first change event, but we might receive a series of events related
248     // to the same file operation). Catch this case to avoid reading overly often.
249     if (!hasStatisticsFileChangedSinceLastSync(resourceLog))
250         return;
251
252     WallTime readTime = WallTime::now();
253
254     auto decoder = createDecoderFromDisk(resourceLog);
255     if (!decoder)
256         return;
257
258     populateFromDecoder(*decoder);
259     m_lastStatisticsFileSyncTime = readTime;
260 }
261     
262 void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
263 {
264     connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
265 }
266
267 void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
268 {
269     connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
270 }
271
272 void WebResourceLoadStatisticsStore::applicationWillTerminate()
273 {
274     BinarySemaphore semaphore;
275     // Make sure any pending work in our queue is finished before we terminate.
276     m_statisticsQueue->dispatch([&semaphore, this, protectedThis = makeRef(*this)] {
277         // Write final file state to disk.
278         if (m_didScheduleWrite)
279             writeStoreToDisk();
280
281         semaphore.signal();
282     });
283     semaphore.wait(WallTime::infinity());
284 }
285
286 String WebResourceLoadStatisticsStore::statisticsStoragePath() const
287 {
288     return m_statisticsStoragePath.isolatedCopy();
289 }
290
291 String WebResourceLoadStatisticsStore::resourceLogFilePath() const
292 {
293     String statisticsStoragePath = this->statisticsStoragePath();
294     if (statisticsStoragePath.isEmpty())
295         return emptyString();
296
297     return pathByAppendingComponent(statisticsStoragePath, "full_browsing_session_resourceLog.plist");
298 }
299
300 void WebResourceLoadStatisticsStore::writeStoreToDisk()
301 {
302     ASSERT(!RunLoop::isMain());
303     
304     stopMonitoringStatisticsStorage();
305
306     syncWithExistingStatisticsStorageIfNeeded();
307
308     auto encoder = createEncoderFromData();
309     RefPtr<SharedBuffer> rawData = encoder->finishEncoding();
310     if (!rawData)
311         return;
312
313     auto statisticsStoragePath = this->statisticsStoragePath();
314     if (!statisticsStoragePath.isEmpty()) {
315         makeAllDirectories(statisticsStoragePath);
316         platformExcludeFromBackup();
317     }
318
319     auto handle = openAndLockFile(resourceLogFilePath(), OpenForWrite);
320     if (handle == invalidPlatformFileHandle)
321         return;
322
323     int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
324     unlockAndCloseFile(handle);
325
326     if (writtenBytes != static_cast<int64_t>(rawData->size()))
327         RELEASE_LOG_ERROR(ResourceLoadStatistics, "WebResourceLoadStatisticsStore: We only wrote %d out of %zu bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
328
329     m_lastStatisticsFileSyncTime = WallTime::now();
330     m_lastStatisticsWriteTime = MonotonicTime::now();
331
332     startMonitoringStatisticsStorage();
333     m_didScheduleWrite = false;
334 }
335
336 void WebResourceLoadStatisticsStore::scheduleOrWriteStoreToDisk()
337 {
338     ASSERT(!RunLoop::isMain());
339
340     auto timeSinceLastWrite = MonotonicTime::now() - m_lastStatisticsWriteTime;
341     if (timeSinceLastWrite < minimumStatisticsFileWriteInterval) {
342         if (!m_didScheduleWrite) {
343             m_didScheduleWrite = true;
344             Seconds delay = minimumStatisticsFileWriteInterval - timeSinceLastWrite + 1_s;
345             m_statisticsQueue->dispatchAfter(delay, [this, protectedThis = makeRef(*this)] {
346                 writeStoreToDisk();
347             });
348         }
349         return;
350     }
351
352     writeStoreToDisk();
353 }
354
355 void WebResourceLoadStatisticsStore::deleteStoreFromDisk()
356 {
357     ASSERT(!RunLoop::isMain());
358     String resourceLogPath = resourceLogFilePath();
359     if (resourceLogPath.isEmpty())
360         return;
361
362     stopMonitoringStatisticsStorage();
363
364     if (!deleteFile(resourceLogPath))
365         RELEASE_LOG_ERROR(ResourceLoadStatistics, "Unable to delete statistics file: %s", resourceLogPath.utf8().data());
366 }
367
368 void WebResourceLoadStatisticsStore::startMonitoringStatisticsStorage()
369 {
370     ASSERT(!RunLoop::isMain());
371     if (m_statisticsStorageMonitor)
372         return;
373     
374     String resourceLogPath = resourceLogFilePath();
375     if (resourceLogPath.isEmpty())
376         return;
377     
378     m_statisticsStorageMonitor = std::make_unique<FileMonitor>(resourceLogPath, m_statisticsQueue.copyRef(), [this] (FileMonitor::FileChangeType type) {
379         ASSERT(!RunLoop::isMain());
380         switch (type) {
381         case FileMonitor::FileChangeType::Modification:
382             refreshFromDisk();
383             break;
384         case FileMonitor::FileChangeType::Removal:
385             clearInMemory();
386             m_statisticsStorageMonitor = nullptr;
387             break;
388         }
389     });
390 }
391
392 void WebResourceLoadStatisticsStore::stopMonitoringStatisticsStorage()
393 {
394     ASSERT(!RunLoop::isMain());
395     m_statisticsStorageMonitor = nullptr;
396 }
397
398 void WebResourceLoadStatisticsStore::syncWithExistingStatisticsStorageIfNeeded()
399 {
400     ASSERT(!RunLoop::isMain());
401     if (m_statisticsStorageMonitor)
402         return;
403
404     String resourceLog = resourceLogFilePath();
405     if (resourceLog.isEmpty() || !fileExists(resourceLog))
406         return;
407
408     refreshFromDisk();
409 }
410
411 #if !PLATFORM(COCOA)
412 void WebResourceLoadStatisticsStore::platformExcludeFromBackup() const
413 {
414 }
415 #endif
416
417 std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& path) const
418 {
419     ASSERT(!RunLoop::isMain());
420     auto handle = openAndLockFile(path, OpenForRead);
421     if (handle == invalidPlatformFileHandle)
422         return nullptr;
423     
424     long long fileSize = 0;
425     if (!getFileSize(handle, fileSize)) {
426         unlockAndCloseFile(handle);
427         return nullptr;
428     }
429     
430     size_t bytesToRead;
431     if (!WTF::convertSafely(fileSize, bytesToRead)) {
432         unlockAndCloseFile(handle);
433         return nullptr;
434     }
435
436     Vector<char> buffer(bytesToRead);
437     size_t totalBytesRead = readFromFile(handle, buffer.data(), buffer.size());
438
439     unlockAndCloseFile(handle);
440
441     if (totalBytesRead != bytesToRead)
442         return nullptr;
443
444     return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(buffer.data()), buffer.size());
445 }
446
447 void WebResourceLoadStatisticsStore::performDailyTasks()
448 {
449     ASSERT(RunLoop::isMain());
450
451     includeTodayAsOperatingDateIfNecessary();
452     if (m_parameters.shouldSubmitTelemetry)
453         submitTelemetry();
454 }
455
456 void WebResourceLoadStatisticsStore::submitTelemetry()
457 {
458     ASSERT(RunLoop::isMain());
459     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
460         WebResourceLoadStatisticsTelemetry::calculateAndSubmit(*this);
461     });
462 }
463
464 void WebResourceLoadStatisticsStore::logUserInteraction(const URL& url)
465 {
466     if (url.isBlankURL() || url.isEmpty())
467         return;
468
469     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
470         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
471         statistics.hadUserInteraction = true;
472         statistics.mostRecentUserInteractionTime = WallTime::now();
473
474         updateCookiePartitioningForDomains({ primaryDomain }, { }, ShouldClearFirst::No);
475     });
476 }
477
478 void WebResourceLoadStatisticsStore::clearUserInteraction(const URL& url)
479 {
480     if (url.isBlankURL() || url.isEmpty())
481         return;
482
483     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
484         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
485         statistics.hadUserInteraction = false;
486         statistics.mostRecentUserInteractionTime = { };
487     });
488 }
489
490 void WebResourceLoadStatisticsStore::hasHadUserInteraction(const URL& url, WTF::Function<void (bool)>&& completionHandler)
491 {
492     if (url.isBlankURL() || url.isEmpty()) {
493         completionHandler(false);
494         return;
495     }
496
497     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
498         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
499         bool hadUserInteraction = mapEntry == m_resourceStatisticsMap.end() ? false: hasHadUnexpiredRecentUserInteraction(mapEntry->value);
500         RunLoop::main().dispatch([hadUserInteraction, completionHandler = WTFMove(completionHandler)] {
501             completionHandler(hadUserInteraction);
502         });
503     });
504 }
505
506 void WebResourceLoadStatisticsStore::setLastSeen(const URL& url, Seconds seconds)
507 {
508     if (url.isBlankURL() || url.isEmpty())
509         return;
510     
511     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), seconds] {
512         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
513         statistics.lastSeen = WallTime::fromRawSeconds(seconds.seconds());
514     });
515 }
516     
517 void WebResourceLoadStatisticsStore::setPrevalentResource(const URL& url)
518 {
519     if (url.isBlankURL() || url.isEmpty())
520         return;
521
522     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
523         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
524         statistics.isPrevalentResource = true;
525     });
526 }
527
528 void WebResourceLoadStatisticsStore::isPrevalentResource(const URL& url, WTF::Function<void (bool)>&& completionHandler)
529 {
530     if (url.isBlankURL() || url.isEmpty()) {
531         completionHandler(false);
532         return;
533     }
534
535     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), completionHandler = WTFMove(completionHandler)] () mutable {
536         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
537         bool isPrevalentResource = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.isPrevalentResource;
538         RunLoop::main().dispatch([isPrevalentResource, completionHandler = WTFMove(completionHandler)] {
539             completionHandler(isPrevalentResource);
540         });
541     });
542 }
543
544 void WebResourceLoadStatisticsStore::clearPrevalentResource(const URL& url)
545 {
546     if (url.isBlankURL() || url.isEmpty())
547         return;
548
549     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url)] {
550         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
551         statistics.isPrevalentResource = false;
552     });
553 }
554
555 void WebResourceLoadStatisticsStore::setGrandfathered(const URL& url, bool value)
556 {
557     if (url.isBlankURL() || url.isEmpty())
558         return;
559
560     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryDomain = isolatedPrimaryDomain(url), value] {
561         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain);
562         statistics.grandfathered = value;
563     });
564 }
565
566 void WebResourceLoadStatisticsStore::isGrandfathered(const URL& url, WTF::Function<void (bool)>&& completionHandler)
567 {
568     if (url.isBlankURL() || url.isEmpty()) {
569         completionHandler(false);
570         return;
571     }
572
573     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), completionHandler = WTFMove(completionHandler), primaryDomain = isolatedPrimaryDomain(url)] () mutable {
574         auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
575         bool isGrandFathered = mapEntry == m_resourceStatisticsMap.end() ? false : mapEntry->value.grandfathered;
576         RunLoop::main().dispatch([isGrandFathered, completionHandler = WTFMove(completionHandler)] {
577             completionHandler(isGrandFathered);
578         });
579     });
580 }
581
582 void WebResourceLoadStatisticsStore::setSubframeUnderTopFrameOrigin(const URL& subframe, const URL& topFrame)
583 {
584     if (subframe.isBlankURL() || subframe.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
585         return;
586
587     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubFrameDomain = isolatedPrimaryDomain(subframe)] {
588         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubFrameDomain);
589         statistics.subframeUnderTopFrameOrigins.add(primaryTopFrameDomain);
590     });
591 }
592
593 void WebResourceLoadStatisticsStore::setSubresourceUnderTopFrameOrigin(const URL& subresource, const URL& topFrame)
594 {
595     if (subresource.isBlankURL() || subresource.isEmpty() || topFrame.isBlankURL() || topFrame.isEmpty())
596         return;
597
598     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryTopFrameDomain = isolatedPrimaryDomain(topFrame), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
599         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
600         statistics.subresourceUnderTopFrameOrigins.add(primaryTopFrameDomain);
601     });
602 }
603
604 void WebResourceLoadStatisticsStore::setSubresourceUniqueRedirectTo(const URL& subresource, const URL& hostNameRedirectedTo)
605 {
606     if (subresource.isBlankURL() || subresource.isEmpty() || hostNameRedirectedTo.isBlankURL() || hostNameRedirectedTo.isEmpty())
607         return;
608
609     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), primaryRedirectDomain = isolatedPrimaryDomain(hostNameRedirectedTo), primarySubresourceDomain = isolatedPrimaryDomain(subresource)] {
610         auto& statistics = ensureResourceStatisticsForPrimaryDomain(primarySubresourceDomain);
611         statistics.subresourceUniqueRedirectsTo.add(primaryRedirectDomain);
612     });
613 }
614
615 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdate()
616 {
617     // Helper function used by testing system. Should only be called from the main thread.
618     ASSERT(RunLoop::isMain());
619
620     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
621         updateCookiePartitioning();
622     });
623 }
624
625 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningUpdateForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
626 {
627     // Helper function used by testing system. Should only be called from the main thread.
628     ASSERT(RunLoop::isMain());
629     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd), shouldClearFirst] {
630         updateCookiePartitioningForDomains(domainsToRemove, domainsToAdd, shouldClearFirst);
631     });
632 }
633
634 #if HAVE(CFNETWORK_STORAGE_PARTITIONING)
635 void WebResourceLoadStatisticsStore::scheduleCookiePartitioningStateReset()
636 {
637     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
638         resetCookiePartitioningState();
639     });
640 }
641 #endif
642
643 void WebResourceLoadStatisticsStore::scheduleClearInMemory()
644 {
645     ASSERT(RunLoop::isMain());
646     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
647         clearInMemory();
648     });
649 }
650
651 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent()
652 {
653     ASSERT(RunLoop::isMain());
654     m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
655         clearInMemory();
656         deleteStoreFromDisk();
657         grandfatherExistingWebsiteData();
658     });
659 }
660
661 void WebResourceLoadStatisticsStore::scheduleClearInMemoryAndPersistent(std::chrono::system_clock::time_point modifiedSince)
662 {
663     // For now, be conservative and clear everything regardless of modifiedSince.
664     UNUSED_PARAM(modifiedSince);
665     scheduleClearInMemoryAndPersistent();
666 }
667
668 void WebResourceLoadStatisticsStore::setTimeToLiveUserInteraction(Seconds seconds)
669 {
670     ASSERT(seconds >= 0_s);
671     m_parameters.timeToLiveUserInteraction = seconds;
672 }
673
674 void WebResourceLoadStatisticsStore::setTimeToLiveCookiePartitionFree(Seconds seconds)
675 {
676     ASSERT(seconds >= 0_s);
677     m_parameters.timeToLiveCookiePartitionFree = seconds;
678 }
679
680 void WebResourceLoadStatisticsStore::setMinimumTimeBetweenDataRecordsRemoval(Seconds seconds)
681 {
682     ASSERT(seconds >= 0_s);
683     m_parameters.minimumTimeBetweenDataRecordsRemoval = seconds;
684 }
685
686 void WebResourceLoadStatisticsStore::setGrandfatheringTime(Seconds seconds)
687 {
688     ASSERT(seconds >= 0_s);
689     m_parameters.grandfatheringTime = seconds;
690 }
691
692 bool WebResourceLoadStatisticsStore::shouldRemoveDataRecords() const
693 {
694     ASSERT(!RunLoop::isMain());
695     if (m_dataRecordsBeingRemoved)
696         return false;
697
698     return !m_lastTimeDataRecordsWereRemoved || MonotonicTime::now() >= (m_lastTimeDataRecordsWereRemoved + m_parameters.minimumTimeBetweenDataRecordsRemoval);
699 }
700
701 void WebResourceLoadStatisticsStore::setDataRecordsBeingRemoved(bool value)
702 {
703     ASSERT(!RunLoop::isMain());
704     m_dataRecordsBeingRemoved = value;
705     if (m_dataRecordsBeingRemoved)
706         m_lastTimeDataRecordsWereRemoved = MonotonicTime::now();
707 }
708
709 ResourceLoadStatistics& WebResourceLoadStatisticsStore::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
710 {
711     ASSERT(!RunLoop::isMain());
712     return m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
713         return ResourceLoadStatistics(primaryDomain);
714     }).iterator->value;
715 }
716
717 std::unique_ptr<KeyedEncoder> WebResourceLoadStatisticsStore::createEncoderFromData() const
718 {
719     ASSERT(!RunLoop::isMain());
720     auto encoder = KeyedEncoder::encoder();
721     encoder->encodeUInt32("version", statisticsModelVersion);
722     encoder->encodeDouble("endOfGrandfatheringTimestamp", m_endOfGrandfatheringTimestamp.secondsSinceEpoch().value());
723
724     encoder->encodeObjects("browsingStatistics", m_resourceStatisticsMap.begin(), m_resourceStatisticsMap.end(), [](KeyedEncoder& encoderInner, const auto& origin) {
725         origin.value.encode(encoderInner);
726     });
727
728     encoder->encodeObjects("operatingDates", m_operatingDates.begin(), m_operatingDates.end(), [](KeyedEncoder& encoderInner, WallTime date) {
729         encoderInner.encodeDouble("date", date.secondsSinceEpoch().value());
730     });
731
732     return encoder;
733 }
734
735 void WebResourceLoadStatisticsStore::populateFromDecoder(KeyedDecoder& decoder)
736 {
737     ASSERT(!RunLoop::isMain());
738     if (!m_resourceStatisticsMap.isEmpty())
739         return;
740
741     unsigned versionOnDisk;
742     if (!decoder.decodeUInt32("version", versionOnDisk))
743         return;
744
745     if (versionOnDisk != statisticsModelVersion)
746         return;
747
748     double endOfGrandfatheringTimestamp;
749     if (decoder.decodeDouble("endOfGrandfatheringTimestamp", endOfGrandfatheringTimestamp))
750         m_endOfGrandfatheringTimestamp = WallTime::fromRawSeconds(endOfGrandfatheringTimestamp);
751     else
752         m_endOfGrandfatheringTimestamp = { };
753
754     Vector<ResourceLoadStatistics> loadedStatistics;
755     bool succeeded = decoder.decodeObjects("browsingStatistics", loadedStatistics, [](KeyedDecoder& decoderInner, ResourceLoadStatistics& statistics) {
756         return statistics.decode(decoderInner);
757     });
758
759     if (!succeeded)
760         return;
761
762     Vector<String> prevalentResourceDomainsWithoutUserInteraction;
763     prevalentResourceDomainsWithoutUserInteraction.reserveInitialCapacity(loadedStatistics.size());
764     for (auto& statistics : loadedStatistics) {
765         if (statistics.isPrevalentResource && !statistics.hadUserInteraction) {
766             prevalentResourceDomainsWithoutUserInteraction.uncheckedAppend(statistics.highLevelDomain);
767             statistics.isMarkedForCookiePartitioning = true;
768         }
769         m_resourceStatisticsMap.add(statistics.highLevelDomain, WTFMove(statistics));
770     }
771
772     succeeded = decoder.decodeObjects("operatingDates", m_operatingDates, [](KeyedDecoder& decoder, WallTime& wallTime) {
773         double value;
774         if (!decoder.decodeDouble("date", value))
775             return false;
776
777         wallTime = WallTime::fromRawSeconds(value);
778         return true;
779     });
780
781     if (!succeeded)
782         return;
783
784     updateCookiePartitioningForDomains({ }, prevalentResourceDomainsWithoutUserInteraction, ShouldClearFirst::Yes);
785 }
786
787 void WebResourceLoadStatisticsStore::clearInMemory()
788 {
789     ASSERT(!RunLoop::isMain());
790     m_resourceStatisticsMap.clear();
791     m_operatingDates.clear();
792
793     updateCookiePartitioningForDomains({ }, { }, ShouldClearFirst::Yes);
794 }
795
796 void WebResourceLoadStatisticsStore::mergeStatistics(Vector<ResourceLoadStatistics>&& statistics)
797 {
798     ASSERT(!RunLoop::isMain());
799     for (auto& statistic : statistics) {
800         auto result = m_resourceStatisticsMap.ensure(statistic.highLevelDomain, [&statistic] {
801             return WTFMove(statistic);
802         });
803         if (!result.isNewEntry)
804             result.iterator->value.merge(statistic);
805     }
806 }
807
808 inline bool WebResourceLoadStatisticsStore::shouldPartitionCookies(const ResourceLoadStatistics& statistic) const
809 {
810     return statistic.isPrevalentResource && (!statistic.hadUserInteraction || WallTime::now() > statistic.mostRecentUserInteractionTime + m_parameters.timeToLiveCookiePartitionFree);
811 }
812
813 void WebResourceLoadStatisticsStore::updateCookiePartitioning()
814 {
815     ASSERT(!RunLoop::isMain());
816
817     Vector<String> domainsToRemove;
818     Vector<String> domainsToAdd;
819     for (auto& resourceStatistic : m_resourceStatisticsMap.values()) {
820         bool shouldPartition = shouldPartitionCookies(resourceStatistic);
821         if (resourceStatistic.isMarkedForCookiePartitioning && !shouldPartition) {
822             resourceStatistic.isMarkedForCookiePartitioning = false;
823             domainsToRemove.append(resourceStatistic.highLevelDomain);
824         } else if (!resourceStatistic.isMarkedForCookiePartitioning && shouldPartition) {
825             resourceStatistic.isMarkedForCookiePartitioning = true;
826             domainsToAdd.append(resourceStatistic.highLevelDomain);
827         }
828     }
829
830     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
831         return;
832
833     RunLoop::main().dispatch([this, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
834         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, ShouldClearFirst::No);
835     });
836 }
837
838 void WebResourceLoadStatisticsStore::updateCookiePartitioningForDomains(const Vector<String>& domainsToRemove, const Vector<String>& domainsToAdd, ShouldClearFirst shouldClearFirst)
839 {
840     ASSERT(!RunLoop::isMain());
841     if (domainsToRemove.isEmpty() && domainsToAdd.isEmpty())
842         return;
843
844     RunLoop::main().dispatch([this, shouldClearFirst, protectedThis = makeRef(*this), domainsToRemove = CrossThreadCopier<Vector<String>>::copy(domainsToRemove), domainsToAdd = CrossThreadCopier<Vector<String>>::copy(domainsToAdd)] () {
845         m_updateCookiePartitioningForDomainsHandler(domainsToRemove, domainsToAdd, shouldClearFirst);
846     });
847
848     if (shouldClearFirst == ShouldClearFirst::Yes)
849         resetCookiePartitioningState();
850     else {
851         for (auto& domain : domainsToRemove)
852             ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = false;
853     }
854
855     for (auto& domain : domainsToAdd)
856         ensureResourceStatisticsForPrimaryDomain(domain).isMarkedForCookiePartitioning = true;
857 }
858
859 void WebResourceLoadStatisticsStore::resetCookiePartitioningState()
860 {
861     ASSERT(!RunLoop::isMain());
862     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
863         resourceStatistic.isMarkedForCookiePartitioning = false;
864 }
865
866 void WebResourceLoadStatisticsStore::processStatistics(const WTF::Function<void (const ResourceLoadStatistics&)>& processFunction) const
867 {
868     ASSERT(!RunLoop::isMain());
869     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
870         processFunction(resourceStatistic);
871 }
872
873 bool WebResourceLoadStatisticsStore::hasHadUnexpiredRecentUserInteraction(ResourceLoadStatistics& resourceStatistic) const
874 {
875     if (resourceStatistic.hadUserInteraction && hasStatisticsExpired(resourceStatistic)) {
876         // Drop privacy sensitive data because we no longer need it.
877         // Set timestamp to 0 so that statistics merge will know
878         // it has been reset as opposed to its default -1.
879         resourceStatistic.mostRecentUserInteractionTime = { };
880         resourceStatistic.hadUserInteraction = false;
881     }
882
883     return resourceStatistic.hadUserInteraction;
884 }
885
886 Vector<String> WebResourceLoadStatisticsStore::topPrivatelyControlledDomainsToRemoveWebsiteDataFor()
887 {
888     ASSERT(!RunLoop::isMain());
889
890     bool shouldCheckForGrandfathering = m_endOfGrandfatheringTimestamp > WallTime::now();
891     bool shouldClearGrandfathering = !shouldCheckForGrandfathering && m_endOfGrandfatheringTimestamp;
892
893     if (shouldClearGrandfathering)
894         m_endOfGrandfatheringTimestamp = { };
895
896     Vector<String> prevalentResources;
897     for (auto& statistic : m_resourceStatisticsMap.values()) {
898         if (statistic.isPrevalentResource && !hasHadUnexpiredRecentUserInteraction(statistic) && (!shouldCheckForGrandfathering || !statistic.grandfathered))
899             prevalentResources.append(statistic.highLevelDomain);
900
901         if (shouldClearGrandfathering && statistic.grandfathered)
902             statistic.grandfathered = false;
903     }
904
905     return prevalentResources;
906 }
907
908 void WebResourceLoadStatisticsStore::includeTodayAsOperatingDateIfNecessary()
909 {
910     if (!m_operatingDates.isEmpty() && (WallTime::now() - m_operatingDates.last() < 24_h))
911         return;
912
913     while (m_operatingDates.size() >= operatingDatesWindow)
914         m_operatingDates.removeFirst();
915
916     m_operatingDates.append(WallTime::now());
917 }
918
919 bool WebResourceLoadStatisticsStore::hasStatisticsExpired(const ResourceLoadStatistics& resourceStatistic) const
920 {
921     if (m_operatingDates.size() >= operatingDatesWindow) {
922         if (resourceStatistic.mostRecentUserInteractionTime < m_operatingDates.first())
923             return true;
924     }
925
926     // If we don't meet the real criteria for an expired statistic, check the user setting for a tighter restriction (mainly for testing).
927     if (m_parameters.timeToLiveUserInteraction) {
928         if (WallTime::now() > resourceStatistic.mostRecentUserInteractionTime + m_parameters.timeToLiveUserInteraction.value())
929             return true;
930     }
931
932     return false;
933 }
934     
935 void WebResourceLoadStatisticsStore::setMaxStatisticsEntries(size_t maximumEntryCount)
936 {
937     m_parameters.maxStatisticsEntries = maximumEntryCount;
938 }
939     
940 void WebResourceLoadStatisticsStore::setPruneEntriesDownTo(size_t pruneTargetCount)
941 {
942     m_parameters.pruneEntriesDownTo = pruneTargetCount;
943 }
944     
945 struct StatisticsLastSeen {
946     String topPrivatelyOwnedDomain;
947     WallTime lastSeen;
948 };
949     
950 static void pruneResources(HashMap<String, WebCore::ResourceLoadStatistics>& statisticsMap, Vector<StatisticsLastSeen>& statisticsToPrune, size_t& numberOfEntriesToPrune)
951 {
952     if (statisticsToPrune.size() > numberOfEntriesToPrune) {
953         std::sort(statisticsToPrune.begin(), statisticsToPrune.end(), [](const StatisticsLastSeen& a, const StatisticsLastSeen& b) {
954             return a.lastSeen < b.lastSeen;
955         });
956     }
957
958     for (size_t i = 0, end = std::min(numberOfEntriesToPrune, statisticsToPrune.size()); i != end; ++i, --numberOfEntriesToPrune)
959         statisticsMap.remove(statisticsToPrune[i].topPrivatelyOwnedDomain);
960 }
961     
962 static unsigned computeImportance(const ResourceLoadStatistics& resourceStatistic)
963 {
964     unsigned importance = maxImportance;
965     if (!resourceStatistic.isPrevalentResource)
966         importance -= 1;
967     if (!resourceStatistic.hadUserInteraction)
968         importance -= 2;
969     return importance;
970 }
971     
972 void WebResourceLoadStatisticsStore::pruneStatisticsIfNeeded()
973 {
974     ASSERT(!RunLoop::isMain());
975     if (m_resourceStatisticsMap.size() <= m_parameters.maxStatisticsEntries)
976         return;
977
978     ASSERT(m_parameters.pruneEntriesDownTo <= m_parameters.maxStatisticsEntries);
979
980     size_t numberOfEntriesLeftToPrune = m_resourceStatisticsMap.size() - m_parameters.pruneEntriesDownTo;
981     ASSERT(numberOfEntriesLeftToPrune);
982     
983     Vector<StatisticsLastSeen> resourcesToPrunePerImportance[maxImportance + 1];
984     for (auto& resourceStatistic : m_resourceStatisticsMap.values())
985         resourcesToPrunePerImportance[computeImportance(resourceStatistic)].append({ resourceStatistic.highLevelDomain, resourceStatistic.lastSeen });
986     
987     for (unsigned importance = 0; numberOfEntriesLeftToPrune && importance <= maxImportance; ++importance)
988         pruneResources(m_resourceStatisticsMap, resourcesToPrunePerImportance[importance], numberOfEntriesLeftToPrune);
989
990     ASSERT(!numberOfEntriesLeftToPrune);
991 }
992
993 void WebResourceLoadStatisticsStore::resetParametersToDefaultValues()
994 {
995     m_parameters = { };
996 }
997     
998 } // namespace WebKit