75225cc41565f8688f2506e85803cf609660f4ca
[WebKit-https.git] / Source / WebKit / NetworkProcess / cache / NetworkCacheStorage.cpp
1 /*
2  * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "NetworkCacheStorage.h"
28
29 #include "Logging.h"
30 #include "NetworkCacheCoders.h"
31 #include "NetworkCacheFileSystem.h"
32 #include "NetworkCacheIOChannel.h"
33 #include <mutex>
34 #include <wtf/Condition.h>
35 #include <wtf/Lock.h>
36 #include <wtf/RandomNumber.h>
37 #include <wtf/RunLoop.h>
38 #include <wtf/text/CString.h>
39
40 namespace WebKit {
41 namespace NetworkCache {
42
43 static const char saltFileName[] = "salt";
44 static const char versionDirectoryPrefix[] = "Version ";
45 static const char recordsDirectoryName[] = "Records";
46 static const char blobsDirectoryName[] = "Blobs";
47 static const char blobSuffix[] = "-blob";
48 constexpr size_t maximumInlineBodySize { 16 * 1024 };
49
50 static double computeRecordWorth(FileTimes);
51
52 struct Storage::ReadOperation {
53     WTF_MAKE_FAST_ALLOCATED;
54 public:
55     ReadOperation(Storage& storage, const Key& key, RetrieveCompletionHandler&& completionHandler)
56         : storage(storage)
57         , key(key)
58         , completionHandler(WTFMove(completionHandler))
59     { }
60
61     void cancel();
62     bool finish();
63
64     Ref<Storage> storage;
65
66     const Key key;
67     const RetrieveCompletionHandler completionHandler;
68     
69     std::unique_ptr<Record> resultRecord;
70     SHA1::Digest expectedBodyHash;
71     BlobStorage::Blob resultBodyBlob;
72     std::atomic<unsigned> activeCount { 0 };
73     bool isCanceled { false };
74 };
75
76 void Storage::ReadOperation::cancel()
77 {
78     ASSERT(RunLoop::isMain());
79
80     if (isCanceled)
81         return;
82     isCanceled = true;
83     completionHandler(nullptr);
84 }
85
86 bool Storage::ReadOperation::finish()
87 {
88     ASSERT(RunLoop::isMain());
89
90     if (isCanceled)
91         return false;
92     if (resultRecord && resultRecord->body.isNull()) {
93         if (resultBodyBlob.hash == expectedBodyHash)
94             resultRecord->body = resultBodyBlob.data;
95         else
96             resultRecord = nullptr;
97     }
98     return completionHandler(WTFMove(resultRecord));
99 }
100
101 struct Storage::WriteOperation {
102     WTF_MAKE_FAST_ALLOCATED;
103 public:
104     WriteOperation(Storage& storage, const Record& record, MappedBodyHandler&& mappedBodyHandler, CompletionHandler<void()>&& completionHandler)
105         : storage(storage)
106         , record(record)
107         , mappedBodyHandler(WTFMove(mappedBodyHandler))
108         , completionHandler(WTFMove(completionHandler))
109     { }
110     Ref<Storage> storage;
111
112     const Record record;
113     const MappedBodyHandler mappedBodyHandler;
114     CompletionHandler<void()> completionHandler;
115
116     std::atomic<unsigned> activeCount { 0 };
117 };
118
119 struct Storage::TraverseOperation {
120     WTF_MAKE_FAST_ALLOCATED;
121 public:
122     TraverseOperation(Storage& storage, const String& type, TraverseFlags flags, TraverseHandler&& handler)
123         : storage(storage)
124         , type(type)
125         , flags(flags)
126         , handler(WTFMove(handler))
127     { }
128     Ref<Storage> storage;
129
130     const String type;
131     const TraverseFlags flags;
132     const TraverseHandler handler;
133
134     Lock activeMutex;
135     Condition activeCondition;
136     unsigned activeCount { 0 };
137 };
138
139 static String makeVersionedDirectoryPath(const String& baseDirectoryPath)
140 {
141     String versionSubdirectory = versionDirectoryPrefix + String::number(Storage::version);
142     return WebCore::FileSystem::pathByAppendingComponent(baseDirectoryPath, versionSubdirectory);
143 }
144
145 static String makeRecordsDirectoryPath(const String& baseDirectoryPath)
146 {
147     return WebCore::FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), recordsDirectoryName);
148 }
149
150 static String makeBlobDirectoryPath(const String& baseDirectoryPath)
151 {
152     return WebCore::FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), blobsDirectoryName);
153 }
154
155 static String makeSaltFilePath(const String& baseDirectoryPath)
156 {
157     return WebCore::FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), saltFileName);
158 }
159
160 RefPtr<Storage> Storage::open(const String& cachePath, Mode mode)
161 {
162     ASSERT(RunLoop::isMain());
163
164     if (!WebCore::FileSystem::makeAllDirectories(makeVersionedDirectoryPath(cachePath)))
165         return nullptr;
166     auto salt = readOrMakeSalt(makeSaltFilePath(cachePath));
167     if (!salt)
168         return nullptr;
169     return adoptRef(new Storage(cachePath, mode, *salt));
170 }
171
172 void traverseRecordsFiles(const String& recordsPath, const String& expectedType, const RecordFileTraverseFunction& function)
173 {
174     traverseDirectory(recordsPath, [&](const String& partitionName, DirectoryEntryType entryType) {
175         if (entryType != DirectoryEntryType::Directory)
176             return;
177         String partitionPath = WebCore::FileSystem::pathByAppendingComponent(recordsPath, partitionName);
178         traverseDirectory(partitionPath, [&](const String& actualType, DirectoryEntryType entryType) {
179             if (entryType != DirectoryEntryType::Directory)
180                 return;
181             if (!expectedType.isEmpty() && expectedType != actualType)
182                 return;
183             String recordDirectoryPath = WebCore::FileSystem::pathByAppendingComponent(partitionPath, actualType);
184             traverseDirectory(recordDirectoryPath, [&function, &recordDirectoryPath, &actualType](const String& fileName, DirectoryEntryType entryType) {
185                 if (entryType != DirectoryEntryType::File || fileName.length() < Key::hashStringLength())
186                     return;
187
188                 String hashString = fileName.substring(0, Key::hashStringLength());
189                 auto isBlob = fileName.length() > Key::hashStringLength() && fileName.endsWith(blobSuffix);
190                 function(fileName, hashString, actualType, isBlob, recordDirectoryPath);
191             });
192         });
193     });
194 }
195
196 static void deleteEmptyRecordsDirectories(const String& recordsPath)
197 {
198     traverseDirectory(recordsPath, [&recordsPath](const String& partitionName, DirectoryEntryType type) {
199         if (type != DirectoryEntryType::Directory)
200             return;
201
202         // Delete [type] sub-folders.
203         String partitionPath = WebCore::FileSystem::pathByAppendingComponent(recordsPath, partitionName);
204         traverseDirectory(partitionPath, [&partitionPath](const String& subdirName, DirectoryEntryType entryType) {
205             if (entryType != DirectoryEntryType::Directory)
206                 return;
207
208             // Let system figure out if it is really empty.
209             WebCore::FileSystem::deleteEmptyDirectory(WebCore::FileSystem::pathByAppendingComponent(partitionPath, subdirName));
210         });
211
212         // Delete [Partition] folders.
213         // Let system figure out if it is really empty.
214         WebCore::FileSystem::deleteEmptyDirectory(WebCore::FileSystem::pathByAppendingComponent(recordsPath, partitionName));
215     });
216 }
217
218 Storage::Storage(const String& baseDirectoryPath, Mode mode, Salt salt)
219     : m_basePath(baseDirectoryPath)
220     , m_recordsPath(makeRecordsDirectoryPath(baseDirectoryPath))
221     , m_mode(mode)
222     , m_salt(salt)
223     , m_canUseBlobsForForBodyData(isSafeToUseMemoryMapForPath(baseDirectoryPath))
224     , m_readOperationTimeoutTimer(*this, &Storage::cancelAllReadOperations)
225     , m_writeOperationDispatchTimer(*this, &Storage::dispatchPendingWriteOperations)
226     , m_ioQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage", WorkQueue::Type::Concurrent))
227     , m_backgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.background", WorkQueue::Type::Concurrent, WorkQueue::QOS::Background))
228     , m_serialBackgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.serialBackground", WorkQueue::Type::Serial, WorkQueue::QOS::Background))
229     , m_blobStorage(makeBlobDirectoryPath(baseDirectoryPath), m_salt)
230 {
231     deleteOldVersions();
232     synchronize();
233 }
234
235 Storage::~Storage()
236 {
237     ASSERT(RunLoop::isMain());
238     ASSERT(m_activeReadOperations.isEmpty());
239     ASSERT(m_activeWriteOperations.isEmpty());
240     ASSERT(m_activeTraverseOperations.isEmpty());
241     ASSERT(!m_synchronizationInProgress);
242     ASSERT(!m_shrinkInProgress);
243 }
244
245 String Storage::basePath() const
246 {
247     return m_basePath.isolatedCopy();
248 }
249
250 String Storage::versionPath() const
251 {
252     return makeVersionedDirectoryPath(basePath());
253 }
254
255 String Storage::recordsPath() const
256 {
257     return m_recordsPath.isolatedCopy();
258 }
259
260 size_t Storage::approximateSize() const
261 {
262     return m_approximateRecordsSize + m_blobStorage.approximateSize();
263 }
264
265 static size_t estimateRecordsSize(unsigned recordCount, unsigned blobCount)
266 {
267     auto inlineBodyCount = recordCount - std::min(blobCount, recordCount);
268     auto headerSizes = recordCount * 4096;
269     auto inlineBodySizes = (maximumInlineBodySize / 2) * inlineBodyCount;
270     return headerSizes + inlineBodySizes;
271 }
272
273 void Storage::synchronize()
274 {
275     ASSERT(RunLoop::isMain());
276
277     if (m_synchronizationInProgress || m_shrinkInProgress)
278         return;
279     m_synchronizationInProgress = true;
280
281     LOG(NetworkCacheStorage, "(NetworkProcess) synchronizing cache");
282
283     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
284         auto recordFilter = std::make_unique<ContentsFilter>();
285         auto blobFilter = std::make_unique<ContentsFilter>();
286
287         // Most of the disk space usage is in blobs if we are using them. Approximate records file sizes to avoid expensive stat() calls.
288         bool shouldComputeExactRecordsSize = !m_canUseBlobsForForBodyData;
289         size_t recordsSize = 0;
290         unsigned recordCount = 0;
291         unsigned blobCount = 0;
292
293         String anyType;
294         traverseRecordsFiles(recordsPath(), anyType, [&](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
295             auto filePath = WebCore::FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
296
297             Key::HashType hash;
298             if (!Key::stringToHash(hashString, hash)) {
299                 WebCore::FileSystem::deleteFile(filePath);
300                 return;
301             }
302
303             if (isBlob) {
304                 ++blobCount;
305                 blobFilter->add(hash);
306                 return;
307             }
308
309             ++recordCount;
310
311             if (shouldComputeExactRecordsSize) {
312                 long long fileSize = 0;
313                 WebCore::FileSystem::getFileSize(filePath, fileSize);
314                 recordsSize += fileSize;
315             }
316
317             recordFilter->add(hash);
318         });
319
320         if (!shouldComputeExactRecordsSize)
321             recordsSize = estimateRecordsSize(recordCount, blobCount);
322
323         RunLoop::main().dispatch([this, recordFilter = WTFMove(recordFilter), blobFilter = WTFMove(blobFilter), recordsSize]() mutable {
324             for (auto& recordFilterKey : m_recordFilterHashesAddedDuringSynchronization)
325                 recordFilter->add(recordFilterKey);
326             m_recordFilterHashesAddedDuringSynchronization.clear();
327
328             for (auto& hash : m_blobFilterHashesAddedDuringSynchronization)
329                 blobFilter->add(hash);
330             m_blobFilterHashesAddedDuringSynchronization.clear();
331
332             m_recordFilter = WTFMove(recordFilter);
333             m_blobFilter = WTFMove(blobFilter);
334             m_approximateRecordsSize = recordsSize;
335             m_synchronizationInProgress = false;
336         });
337
338         m_blobStorage.synchronize();
339
340         deleteEmptyRecordsDirectories(recordsPath());
341
342         LOG(NetworkCacheStorage, "(NetworkProcess) cache synchronization completed size=%zu recordCount=%u", recordsSize, recordCount);
343
344         RunLoop::main().dispatch([protectedThis = WTFMove(protectedThis)] { });
345     });
346 }
347
348 void Storage::addToRecordFilter(const Key& key)
349 {
350     ASSERT(RunLoop::isMain());
351
352     if (m_recordFilter)
353         m_recordFilter->add(key.hash());
354
355     // If we get new entries during filter synchronization take care to add them to the new filter as well.
356     if (m_synchronizationInProgress)
357         m_recordFilterHashesAddedDuringSynchronization.append(key.hash());
358 }
359
360 bool Storage::mayContain(const Key& key) const
361 {
362     ASSERT(RunLoop::isMain());
363     return !m_recordFilter || m_recordFilter->mayContain(key.hash());
364 }
365
366 bool Storage::mayContainBlob(const Key& key) const
367 {
368     ASSERT(RunLoop::isMain());
369     if (!m_canUseBlobsForForBodyData)
370         return false;
371     return !m_blobFilter || m_blobFilter->mayContain(key.hash());
372 }
373
374 String Storage::recordDirectoryPathForKey(const Key& key) const
375 {
376     ASSERT(!key.type().isEmpty());
377     return WebCore::FileSystem::pathByAppendingComponent(WebCore::FileSystem::pathByAppendingComponent(recordsPath(), key.partitionHashAsString()), key.type());
378 }
379
380 String Storage::recordPathForKey(const Key& key) const
381 {
382     return WebCore::FileSystem::pathByAppendingComponent(recordDirectoryPathForKey(key), key.hashAsString());
383 }
384
385 static String blobPathForRecordPath(const String& recordPath)
386 {
387     return recordPath + blobSuffix;
388 }
389
390 String Storage::blobPathForKey(const Key& key) const
391 {
392     return blobPathForRecordPath(recordPathForKey(key));
393 }
394
395 struct RecordMetaData {
396     RecordMetaData() { }
397     explicit RecordMetaData(const Key& key)
398         : cacheStorageVersion(Storage::version)
399         , key(key)
400     { }
401
402     unsigned cacheStorageVersion;
403     Key key;
404     WallTime timeStamp;
405     SHA1::Digest headerHash;
406     uint64_t headerSize { 0 };
407     SHA1::Digest bodyHash;
408     uint64_t bodySize { 0 };
409     bool isBodyInline { false };
410
411     // Not encoded as a field. Header starts immediately after meta data.
412     uint64_t headerOffset { 0 };
413 };
414
415 static bool decodeRecordMetaData(RecordMetaData& metaData, const Data& fileData)
416 {
417     bool success = false;
418     fileData.apply([&metaData, &success](const uint8_t* data, size_t size) {
419         WTF::Persistence::Decoder decoder(data, size);
420         if (!decoder.decode(metaData.cacheStorageVersion))
421             return false;
422         if (!decoder.decode(metaData.key))
423             return false;
424         if (!decoder.decode(metaData.timeStamp))
425             return false;
426         if (!decoder.decode(metaData.headerHash))
427             return false;
428         if (!decoder.decode(metaData.headerSize))
429             return false;
430         if (!decoder.decode(metaData.bodyHash))
431             return false;
432         if (!decoder.decode(metaData.bodySize))
433             return false;
434         if (!decoder.decode(metaData.isBodyInline))
435             return false;
436         if (!decoder.verifyChecksum())
437             return false;
438         metaData.headerOffset = decoder.currentOffset();
439         success = true;
440         return false;
441     });
442     return success;
443 }
444
445 static bool decodeRecordHeader(const Data& fileData, RecordMetaData& metaData, Data& headerData, const Salt& salt)
446 {
447     if (!decodeRecordMetaData(metaData, fileData)) {
448         LOG(NetworkCacheStorage, "(NetworkProcess) meta data decode failure");
449         return false;
450     }
451
452     if (metaData.cacheStorageVersion != Storage::version) {
453         LOG(NetworkCacheStorage, "(NetworkProcess) version mismatch");
454         return false;
455     }
456
457     headerData = fileData.subrange(metaData.headerOffset, metaData.headerSize);
458     if (metaData.headerHash != computeSHA1(headerData, salt)) {
459         LOG(NetworkCacheStorage, "(NetworkProcess) header checksum mismatch");
460         return false;
461     }
462     return true;
463 }
464
465 void Storage::readRecord(ReadOperation& readOperation, const Data& recordData)
466 {
467     ASSERT(!RunLoop::isMain());
468
469     RecordMetaData metaData;
470     Data headerData;
471     if (!decodeRecordHeader(recordData, metaData, headerData, m_salt))
472         return;
473
474     if (metaData.key != readOperation.key)
475         return;
476
477     // Sanity check against time stamps in future.
478     if (metaData.timeStamp > WallTime::now())
479         return;
480
481     Data bodyData;
482     if (metaData.isBodyInline) {
483         size_t bodyOffset = metaData.headerOffset + headerData.size();
484         if (bodyOffset + metaData.bodySize != recordData.size())
485             return;
486         bodyData = recordData.subrange(bodyOffset, metaData.bodySize);
487         if (metaData.bodyHash != computeSHA1(bodyData, m_salt))
488             return;
489     }
490
491     readOperation.expectedBodyHash = metaData.bodyHash;
492     readOperation.resultRecord = std::make_unique<Storage::Record>(Storage::Record {
493         metaData.key,
494         metaData.timeStamp,
495         headerData,
496         bodyData,
497         metaData.bodyHash
498     });
499 }
500
501 static Data encodeRecordMetaData(const RecordMetaData& metaData)
502 {
503     WTF::Persistence::Encoder encoder;
504
505     encoder << metaData.cacheStorageVersion;
506     encoder << metaData.key;
507     encoder << metaData.timeStamp;
508     encoder << metaData.headerHash;
509     encoder << metaData.headerSize;
510     encoder << metaData.bodyHash;
511     encoder << metaData.bodySize;
512     encoder << metaData.isBodyInline;
513
514     encoder.encodeChecksum();
515
516     return Data(encoder.buffer(), encoder.bufferSize());
517 }
518
519 std::optional<BlobStorage::Blob> Storage::storeBodyAsBlob(WriteOperation& writeOperation)
520 {
521     auto blobPath = blobPathForKey(writeOperation.record.key);
522
523     // Store the body.
524     auto blob = m_blobStorage.add(blobPath, writeOperation.record.body);
525     if (blob.data.isNull())
526         return { };
527
528     ++writeOperation.activeCount;
529
530     RunLoop::main().dispatch([this, blob, &writeOperation] {
531         if (m_blobFilter)
532             m_blobFilter->add(writeOperation.record.key.hash());
533         if (m_synchronizationInProgress)
534             m_blobFilterHashesAddedDuringSynchronization.append(writeOperation.record.key.hash());
535
536         if (writeOperation.mappedBodyHandler)
537             writeOperation.mappedBodyHandler(blob.data);
538
539         finishWriteOperation(writeOperation);
540     });
541     return blob;
542 }
543
544 Data Storage::encodeRecord(const Record& record, std::optional<BlobStorage::Blob> blob)
545 {
546     ASSERT(!blob || bytesEqual(blob.value().data, record.body));
547
548     RecordMetaData metaData(record.key);
549     metaData.timeStamp = record.timeStamp;
550     metaData.headerHash = computeSHA1(record.header, m_salt);
551     metaData.headerSize = record.header.size();
552     metaData.bodyHash = blob ? blob.value().hash : computeSHA1(record.body, m_salt);
553     metaData.bodySize = record.body.size();
554     metaData.isBodyInline = !blob;
555
556     auto encodedMetaData = encodeRecordMetaData(metaData);
557     auto headerData = concatenate(encodedMetaData, record.header);
558
559     if (metaData.isBodyInline)
560         return concatenate(headerData, record.body);
561
562     return { headerData };
563 }
564
565 void Storage::removeFromPendingWriteOperations(const Key& key)
566 {
567     while (true) {
568         auto found = m_pendingWriteOperations.findIf([&key](auto& operation) {
569             return operation->record.key == key;
570         });
571
572         if (found == m_pendingWriteOperations.end())
573             break;
574
575         m_pendingWriteOperations.remove(found);
576     }
577 }
578
579 void Storage::remove(const Key& key)
580 {
581     ASSERT(RunLoop::isMain());
582
583     if (!mayContain(key))
584         return;
585
586     auto protectedThis = makeRef(*this);
587
588     // We can't remove the key from the Bloom filter (but some false positives are expected anyway).
589     // For simplicity we also don't reduce m_approximateSize on removals.
590     // The next synchronization will update everything.
591
592     removeFromPendingWriteOperations(key);
593
594     serialBackgroundIOQueue().dispatch([this, protectedThis = WTFMove(protectedThis), key] () mutable {
595         deleteFiles(key);
596         RunLoop::main().dispatch([protectedThis = WTFMove(protectedThis)] { });
597     });
598 }
599
600 void Storage::remove(const Vector<Key>& keys, Function<void ()>&& completionHandler)
601 {
602     ASSERT(RunLoop::isMain());
603
604     Vector<Key> keysToRemove;
605     keysToRemove.reserveInitialCapacity(keys.size());
606
607     for (auto& key : keys) {
608         if (!mayContain(key))
609             continue;
610         removeFromPendingWriteOperations(key);
611         keysToRemove.uncheckedAppend(key);
612     }
613
614     serialBackgroundIOQueue().dispatch([this, protectedThis = makeRef(*this), keysToRemove = WTFMove(keysToRemove), completionHandler = WTFMove(completionHandler)] () mutable {
615         for (auto& key : keysToRemove)
616             deleteFiles(key);
617
618         RunLoop::main().dispatch([protectedThis = WTFMove(protectedThis), completionHandler = WTFMove(completionHandler)] {
619             if (completionHandler)
620                 completionHandler();
621         });
622     });
623 }
624
625 void Storage::deleteFiles(const Key& key)
626 {
627     ASSERT(!RunLoop::isMain());
628
629     WebCore::FileSystem::deleteFile(recordPathForKey(key));
630     m_blobStorage.remove(blobPathForKey(key));
631 }
632
633 void Storage::updateFileModificationTime(const String& path)
634 {
635     serialBackgroundIOQueue().dispatch([path = path.isolatedCopy()] {
636         updateFileModificationTimeIfNeeded(path);
637     });
638 }
639
640 void Storage::dispatchReadOperation(std::unique_ptr<ReadOperation> readOperationPtr)
641 {
642     ASSERT(RunLoop::isMain());
643
644     auto& readOperation = *readOperationPtr;
645     m_activeReadOperations.add(WTFMove(readOperationPtr));
646
647     // Avoid randomness during testing.
648     if (m_mode != Mode::Testing) {
649         // I/O pressure may make disk operations slow. If they start taking very long time we rather go to network.
650         const Seconds readTimeout = 1500_ms;
651         m_readOperationTimeoutTimer.startOneShot(readTimeout);
652     }
653
654     bool shouldGetBodyBlob = mayContainBlob(readOperation.key);
655
656     ioQueue().dispatch([this, &readOperation, shouldGetBodyBlob] {
657         auto recordPath = recordPathForKey(readOperation.key);
658
659         ++readOperation.activeCount;
660         if (shouldGetBodyBlob)
661             ++readOperation.activeCount;
662
663         auto channel = IOChannel::open(recordPath, IOChannel::Type::Read);
664         channel->read(0, std::numeric_limits<size_t>::max(), &ioQueue(), [this, &readOperation](const Data& fileData, int error) {
665             if (!error)
666                 readRecord(readOperation, fileData);
667             finishReadOperation(readOperation);
668         });
669
670         if (shouldGetBodyBlob) {
671             // Read the blob in parallel with the record read.
672             auto blobPath = blobPathForKey(readOperation.key);
673             readOperation.resultBodyBlob = m_blobStorage.get(blobPath);
674             finishReadOperation(readOperation);
675         }
676     });
677 }
678
679 void Storage::finishReadOperation(ReadOperation& readOperation)
680 {
681     ASSERT(readOperation.activeCount);
682     // Record and blob reads must finish.
683     if (--readOperation.activeCount)
684         return;
685
686     RunLoop::main().dispatch([this, &readOperation] {
687         bool success = readOperation.finish();
688         if (success)
689             updateFileModificationTime(recordPathForKey(readOperation.key));
690         else if (!readOperation.isCanceled)
691             remove(readOperation.key);
692
693         auto protectedThis = makeRef(*this);
694
695         ASSERT(m_activeReadOperations.contains(&readOperation));
696         m_activeReadOperations.remove(&readOperation);
697
698         if (m_activeReadOperations.isEmpty())
699             m_readOperationTimeoutTimer.stop();
700         
701         dispatchPendingReadOperations();
702
703         LOG(NetworkCacheStorage, "(NetworkProcess) read complete success=%d", success);
704     });
705 }
706
707 void Storage::cancelAllReadOperations()
708 {
709     ASSERT(RunLoop::isMain());
710
711     for (auto& readOperation : m_activeReadOperations)
712         readOperation->cancel();
713
714     size_t pendingCount = 0;
715     for (int priority = maximumRetrievePriority; priority >= 0; --priority) {
716         auto& pendingRetrieveQueue = m_pendingReadOperationsByPriority[priority];
717         pendingCount += pendingRetrieveQueue.size();
718         for (auto it = pendingRetrieveQueue.rbegin(), end = pendingRetrieveQueue.rend(); it != end; ++it)
719             (*it)->cancel();
720         pendingRetrieveQueue.clear();
721     }
722
723     LOG(NetworkCacheStorage, "(NetworkProcess) retrieve timeout, canceled %u active and %zu pending", m_activeReadOperations.size(), pendingCount);
724 }
725
726 void Storage::dispatchPendingReadOperations()
727 {
728     ASSERT(RunLoop::isMain());
729
730     const int maximumActiveReadOperationCount = 5;
731
732     for (int priority = maximumRetrievePriority; priority >= 0; --priority) {
733         if (m_activeReadOperations.size() > maximumActiveReadOperationCount) {
734             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel retrieves");
735             return;
736         }
737         auto& pendingRetrieveQueue = m_pendingReadOperationsByPriority[priority];
738         if (pendingRetrieveQueue.isEmpty())
739             continue;
740         dispatchReadOperation(pendingRetrieveQueue.takeLast());
741     }
742 }
743
744 template <class T> bool retrieveFromMemory(const T& operations, const Key& key, Storage::RetrieveCompletionHandler& completionHandler)
745 {
746     for (auto& operation : operations) {
747         if (operation->record.key == key) {
748             LOG(NetworkCacheStorage, "(NetworkProcess) found write operation in progress");
749             RunLoop::main().dispatch([record = operation->record, completionHandler = WTFMove(completionHandler)] {
750                 completionHandler(std::make_unique<Storage::Record>(record));
751             });
752             return true;
753         }
754     }
755     return false;
756 }
757
758 void Storage::dispatchPendingWriteOperations()
759 {
760     ASSERT(RunLoop::isMain());
761
762     const int maximumActiveWriteOperationCount { 1 };
763
764     while (!m_pendingWriteOperations.isEmpty()) {
765         if (m_activeWriteOperations.size() >= maximumActiveWriteOperationCount) {
766             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel writes");
767             return;
768         }
769         dispatchWriteOperation(m_pendingWriteOperations.takeLast());
770     }
771 }
772
773 bool Storage::shouldStoreBodyAsBlob(const Data& bodyData)
774 {
775     if (!m_canUseBlobsForForBodyData)
776         return false;
777     return bodyData.size() > maximumInlineBodySize;
778 }
779
780 void Storage::dispatchWriteOperation(std::unique_ptr<WriteOperation> writeOperationPtr)
781 {
782     ASSERT(RunLoop::isMain());
783
784     auto& writeOperation = *writeOperationPtr;
785     m_activeWriteOperations.add(WTFMove(writeOperationPtr));
786
787     // This was added already when starting the store but filter might have been wiped.
788     addToRecordFilter(writeOperation.record.key);
789
790     backgroundIOQueue().dispatch([this, &writeOperation] {
791         auto recordDirectorPath = recordDirectoryPathForKey(writeOperation.record.key);
792         auto recordPath = recordPathForKey(writeOperation.record.key);
793
794         WebCore::FileSystem::makeAllDirectories(recordDirectorPath);
795
796         ++writeOperation.activeCount;
797
798         bool shouldStoreAsBlob = shouldStoreBodyAsBlob(writeOperation.record.body);
799         auto blob = shouldStoreAsBlob ? storeBodyAsBlob(writeOperation) : std::nullopt;
800
801         auto recordData = encodeRecord(writeOperation.record, blob);
802
803         auto channel = IOChannel::open(recordPath, IOChannel::Type::Create);
804         size_t recordSize = recordData.size();
805         channel->write(0, recordData, nullptr, [this, &writeOperation, recordSize](int error) {
806             // On error the entry still stays in the contents filter until next synchronization.
807             m_approximateRecordsSize += recordSize;
808             finishWriteOperation(writeOperation);
809
810             LOG(NetworkCacheStorage, "(NetworkProcess) write complete error=%d", error);
811         });
812     });
813 }
814
815 void Storage::finishWriteOperation(WriteOperation& writeOperation)
816 {
817     ASSERT(RunLoop::isMain());
818     ASSERT(writeOperation.activeCount);
819     ASSERT(m_activeWriteOperations.contains(&writeOperation));
820
821     if (--writeOperation.activeCount)
822         return;
823
824     auto protectedThis = makeRef(*this);
825
826     if (writeOperation.completionHandler)
827         writeOperation.completionHandler();
828
829     m_activeWriteOperations.remove(&writeOperation);
830     dispatchPendingWriteOperations();
831
832     shrinkIfNeeded();
833 }
834
835 void Storage::retrieve(const Key& key, unsigned priority, RetrieveCompletionHandler&& completionHandler)
836 {
837     ASSERT(RunLoop::isMain());
838     ASSERT(priority <= maximumRetrievePriority);
839     ASSERT(!key.isNull());
840
841     if (!m_capacity) {
842         completionHandler(nullptr);
843         return;
844     }
845
846     if (!mayContain(key)) {
847         completionHandler(nullptr);
848         return;
849     }
850
851     if (retrieveFromMemory(m_pendingWriteOperations, key, completionHandler))
852         return;
853     if (retrieveFromMemory(m_activeWriteOperations, key, completionHandler))
854         return;
855
856     auto readOperation = std::make_unique<ReadOperation>(*this, key, WTFMove(completionHandler));
857     m_pendingReadOperationsByPriority[priority].prepend(WTFMove(readOperation));
858     dispatchPendingReadOperations();
859 }
860
861 void Storage::store(const Record& record, MappedBodyHandler&& mappedBodyHandler, CompletionHandler<void()>&& completionHandler)
862 {
863     ASSERT(RunLoop::isMain());
864     ASSERT(!record.key.isNull());
865
866     if (!m_capacity)
867         return;
868
869     auto writeOperation = std::make_unique<WriteOperation>(*this, record, WTFMove(mappedBodyHandler), WTFMove(completionHandler));
870     m_pendingWriteOperations.prepend(WTFMove(writeOperation));
871
872     // Add key to the filter already here as we do lookups from the pending operations too.
873     addToRecordFilter(record.key);
874
875     bool isInitialWrite = m_pendingWriteOperations.size() == 1;
876     if (!isInitialWrite)
877         return;
878
879     m_writeOperationDispatchTimer.startOneShot(m_initialWriteDelay);
880 }
881
882 void Storage::traverse(const String& type, TraverseFlags flags, TraverseHandler&& traverseHandler)
883 {
884     ASSERT(RunLoop::isMain());
885     ASSERT(traverseHandler);
886     // Avoid non-thread safe Function copies.
887
888     auto traverseOperationPtr = std::make_unique<TraverseOperation>(*this, type, flags, WTFMove(traverseHandler));
889     auto& traverseOperation = *traverseOperationPtr;
890     m_activeTraverseOperations.add(WTFMove(traverseOperationPtr));
891
892     ioQueue().dispatch([this, &traverseOperation] {
893         traverseRecordsFiles(recordsPath(), traverseOperation.type, [this, &traverseOperation](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
894             ASSERT(type == traverseOperation.type || traverseOperation.type.isEmpty());
895             if (isBlob)
896                 return;
897
898             auto recordPath = WebCore::FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
899
900             double worth = -1;
901             if (traverseOperation.flags & TraverseFlag::ComputeWorth)
902                 worth = computeRecordWorth(fileTimes(recordPath));
903             unsigned bodyShareCount = 0;
904             if (traverseOperation.flags & TraverseFlag::ShareCount)
905                 bodyShareCount = m_blobStorage.shareCount(blobPathForRecordPath(recordPath));
906
907             std::unique_lock<Lock> lock(traverseOperation.activeMutex);
908             ++traverseOperation.activeCount;
909
910             auto channel = IOChannel::open(recordPath, IOChannel::Type::Read);
911             channel->read(0, std::numeric_limits<size_t>::max(), nullptr, [this, &traverseOperation, worth, bodyShareCount](Data& fileData, int) {
912                 RecordMetaData metaData;
913                 Data headerData;
914                 if (decodeRecordHeader(fileData, metaData, headerData, m_salt)) {
915                     Record record {
916                         metaData.key,
917                         metaData.timeStamp,
918                         headerData,
919                         { },
920                         metaData.bodyHash
921                     };
922                     RecordInfo info {
923                         static_cast<size_t>(metaData.bodySize),
924                         worth,
925                         bodyShareCount,
926                         String::fromUTF8(SHA1::hexDigest(metaData.bodyHash))
927                     };
928                     traverseOperation.handler(&record, info);
929                 }
930
931                 std::lock_guard<Lock> lock(traverseOperation.activeMutex);
932                 --traverseOperation.activeCount;
933                 traverseOperation.activeCondition.notifyOne();
934             });
935
936             static const unsigned maximumParallelReadCount = 5;
937             traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
938                 return traverseOperation.activeCount <= maximumParallelReadCount;
939             });
940         });
941         {
942             // Wait for all reads to finish.
943             std::unique_lock<Lock> lock(traverseOperation.activeMutex);
944             traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
945                 return !traverseOperation.activeCount;
946             });
947         }
948         RunLoop::main().dispatch([this, &traverseOperation] {
949             traverseOperation.handler(nullptr, { });
950
951             auto protectedThis = makeRef(*this);
952
953             m_activeTraverseOperations.remove(&traverseOperation);
954         });
955     });
956 }
957
958 void Storage::setCapacity(size_t capacity)
959 {
960     ASSERT(RunLoop::isMain());
961
962 #if !ASSERT_DISABLED
963     const size_t assumedAverageRecordSize = 50 << 10;
964     size_t maximumRecordCount = capacity / assumedAverageRecordSize;
965     // ~10 bits per element are required for <1% false positive rate.
966     size_t effectiveBloomFilterCapacity = ContentsFilter::tableSize / 10;
967     // If this gets hit it might be time to increase the filter size.
968     ASSERT(maximumRecordCount < effectiveBloomFilterCapacity);
969 #endif
970
971     m_capacity = capacity;
972
973     shrinkIfNeeded();
974 }
975
976 void Storage::clear(const String& type, WallTime modifiedSinceTime, Function<void ()>&& completionHandler)
977 {
978     ASSERT(RunLoop::isMain());
979     LOG(NetworkCacheStorage, "(NetworkProcess) clearing cache");
980
981     if (m_recordFilter)
982         m_recordFilter->clear();
983     if (m_blobFilter)
984         m_blobFilter->clear();
985     m_approximateRecordsSize = 0;
986
987     ioQueue().dispatch([this, protectedThis = makeRef(*this), modifiedSinceTime, completionHandler = WTFMove(completionHandler), type = type.isolatedCopy()] () mutable {
988         auto recordsPath = this->recordsPath();
989         traverseRecordsFiles(recordsPath, type, [modifiedSinceTime](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
990             auto filePath = WebCore::FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
991             if (modifiedSinceTime > -WallTime::infinity()) {
992                 auto times = fileTimes(filePath);
993                 if (times.modification < modifiedSinceTime)
994                     return;
995             }
996             WebCore::FileSystem::deleteFile(filePath);
997         });
998
999         deleteEmptyRecordsDirectories(recordsPath);
1000
1001         // This cleans unreferenced blobs.
1002         m_blobStorage.synchronize();
1003
1004         RunLoop::main().dispatch([protectedThis = WTFMove(protectedThis), completionHandler = WTFMove(completionHandler)] {
1005             if (completionHandler)
1006                 completionHandler();
1007         });
1008     });
1009 }
1010
1011 static double computeRecordWorth(FileTimes times)
1012 {
1013     auto age = WallTime::now() - times.creation;
1014     // File modification time is updated manually on cache read. We don't use access time since OS may update it automatically.
1015     auto accessAge = times.modification - times.creation;
1016
1017     // For sanity.
1018     if (age <= 0_s || accessAge < 0_s || accessAge > age)
1019         return 0;
1020
1021     // We like old entries that have been accessed recently.
1022     return accessAge / age;
1023 }
1024
1025 static double deletionProbability(FileTimes times, unsigned bodyShareCount)
1026 {
1027     static const double maximumProbability { 0.33 };
1028     static const unsigned maximumEffectiveShareCount { 5 };
1029
1030     auto worth = computeRecordWorth(times);
1031
1032     // Adjust a bit so the most valuable entries don't get deleted at all.
1033     auto effectiveWorth = std::min(1.1 * worth, 1.);
1034
1035     auto probability =  (1 - effectiveWorth) * maximumProbability;
1036
1037     // It is less useful to remove an entry that shares its body data.
1038     if (bodyShareCount)
1039         probability /= std::min(bodyShareCount, maximumEffectiveShareCount);
1040
1041     return probability;
1042 }
1043
1044 void Storage::shrinkIfNeeded()
1045 {
1046     ASSERT(RunLoop::isMain());
1047
1048     // Avoid randomness caused by cache shrinks.
1049     if (m_mode == Mode::Testing)
1050         return;
1051
1052     if (approximateSize() > m_capacity)
1053         shrink();
1054 }
1055
1056 void Storage::shrink()
1057 {
1058     ASSERT(RunLoop::isMain());
1059
1060     if (m_shrinkInProgress || m_synchronizationInProgress)
1061         return;
1062     m_shrinkInProgress = true;
1063
1064     LOG(NetworkCacheStorage, "(NetworkProcess) shrinking cache approximateSize=%zu capacity=%zu", approximateSize(), m_capacity);
1065
1066     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
1067         auto recordsPath = this->recordsPath();
1068         String anyType;
1069         traverseRecordsFiles(recordsPath, anyType, [this](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
1070             if (isBlob)
1071                 return;
1072
1073             auto recordPath = WebCore::FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
1074             auto blobPath = blobPathForRecordPath(recordPath);
1075
1076             auto times = fileTimes(recordPath);
1077             unsigned bodyShareCount = m_blobStorage.shareCount(blobPath);
1078             auto probability = deletionProbability(times, bodyShareCount);
1079
1080             bool shouldDelete = randomNumber() < probability;
1081
1082             LOG(NetworkCacheStorage, "Deletion probability=%f bodyLinkCount=%d shouldDelete=%d", probability, bodyShareCount, shouldDelete);
1083
1084             if (shouldDelete) {
1085                 WebCore::FileSystem::deleteFile(recordPath);
1086                 m_blobStorage.remove(blobPath);
1087             }
1088         });
1089
1090         RunLoop::main().dispatch([this, protectedThis = WTFMove(protectedThis)] {
1091             m_shrinkInProgress = false;
1092             // We could synchronize during the shrink traversal. However this is fast and it is better to have just one code path.
1093             synchronize();
1094         });
1095
1096         LOG(NetworkCacheStorage, "(NetworkProcess) cache shrink completed");
1097     });
1098 }
1099
1100 void Storage::deleteOldVersions()
1101 {
1102     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
1103         auto cachePath = basePath();
1104         traverseDirectory(cachePath, [&cachePath](const String& subdirName, DirectoryEntryType type) {
1105             if (type != DirectoryEntryType::Directory)
1106                 return;
1107             if (!subdirName.startsWith(versionDirectoryPrefix))
1108                 return;
1109             auto versionString = subdirName.substring(strlen(versionDirectoryPrefix));
1110             bool success;
1111             unsigned directoryVersion = versionString.toUIntStrict(&success);
1112             if (!success)
1113                 return;
1114             if (directoryVersion >= version)
1115                 return;
1116 #if PLATFORM(MAC)
1117             if (directoryVersion == lastStableVersion)
1118                 return;
1119 #endif
1120
1121             auto oldVersionPath = WebCore::FileSystem::pathByAppendingComponent(cachePath, subdirName);
1122             LOG(NetworkCacheStorage, "(NetworkProcess) deleting old cache version, path %s", oldVersionPath.utf8().data());
1123
1124             deleteDirectoryRecursively(oldVersionPath);
1125         });
1126
1127         RunLoop::main().dispatch([protectedThis = WTFMove(protectedThis)] { });
1128     });
1129 }
1130
1131 }
1132 }