Make NetworkCache::traverse faster
[WebKit-https.git] / Source / WebKit2 / NetworkProcess / cache / NetworkCacheStorage.cpp
1 /*
2  * Copyright (C) 2014-2015 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "NetworkCacheStorage.h"
28
29 #if ENABLE(NETWORK_CACHE)
30
31 #include "Logging.h"
32 #include "NetworkCacheCoders.h"
33 #include "NetworkCacheFileSystem.h"
34 #include "NetworkCacheIOChannel.h"
35 #include <condition_variable>
36 #include <wtf/RandomNumber.h>
37 #include <wtf/RunLoop.h>
38 #include <wtf/text/CString.h>
39 #include <wtf/text/StringBuilder.h>
40
41 namespace WebKit {
42 namespace NetworkCache {
43
44 static const char versionDirectoryPrefix[] = "Version ";
45 static const char recordsDirectoryName[] = "Records";
46 static const char blobsDirectoryName[] = "Blobs";
47 static const char bodyPostfix[] = "-body";
48
49 static double computeRecordWorth(FileTimes);
50
51 struct Storage::ReadOperation {
52     ReadOperation(const Key& key, const RetrieveCompletionHandler& completionHandler)
53         : key(key)
54         , completionHandler(completionHandler)
55     { }
56
57     const Key key;
58     const RetrieveCompletionHandler completionHandler;
59     
60     std::unique_ptr<Record> resultRecord;
61     SHA1::Digest expectedBodyHash;
62     BlobStorage::Blob resultBodyBlob;
63     std::atomic<unsigned> activeCount { 0 };
64 };
65
66 struct Storage::WriteOperation {
67     WriteOperation(const Record& record, const MappedBodyHandler& mappedBodyHandler)
68         : record(record)
69         , mappedBodyHandler(mappedBodyHandler)
70     { }
71     
72     const Record record;
73     const MappedBodyHandler mappedBodyHandler;
74
75     std::atomic<unsigned> activeCount { 0 };
76 };
77
78 struct Storage::TraverseOperation {
79     TraverseOperation(TraverseFlags flags, const TraverseHandler& handler)
80         : flags(flags)
81         , handler(handler)
82     { }
83
84     const TraverseFlags flags;
85     const TraverseHandler handler;
86
87     std::mutex activeMutex;
88     std::condition_variable activeCondition;
89     unsigned activeCount { 0 };
90 };
91
92 std::unique_ptr<Storage> Storage::open(const String& cachePath)
93 {
94     ASSERT(RunLoop::isMain());
95
96     if (!WebCore::makeAllDirectories(cachePath))
97         return nullptr;
98     return std::unique_ptr<Storage>(new Storage(cachePath));
99 }
100
101 static String makeVersionedDirectoryPath(const String& baseDirectoryPath)
102 {
103     String versionSubdirectory = versionDirectoryPrefix + String::number(Storage::version);
104     return WebCore::pathByAppendingComponent(baseDirectoryPath, versionSubdirectory);
105 }
106
107 static String makeRecordsDirectoryPath(const String& baseDirectoryPath)
108 {
109     return WebCore::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), recordsDirectoryName);
110 }
111
112 static String makeBlobDirectoryPath(const String& baseDirectoryPath)
113 {
114     return WebCore::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), blobsDirectoryName);
115 }
116
117 void traverseRecordsFiles(const String& recordsPath, const std::function<void (const String&, const String&)>& function)
118 {
119     traverseDirectory(recordsPath, [&recordsPath, &function](const String& subdirName, DirectoryEntryType type) {
120         if (type != DirectoryEntryType::Directory)
121             return;
122         String partitionPath = WebCore::pathByAppendingComponent(recordsPath, subdirName);
123         traverseDirectory(partitionPath, [&function, &partitionPath](const String& fileName, DirectoryEntryType type) {
124             if (type != DirectoryEntryType::File)
125                 return;
126             function(fileName, partitionPath);
127         });
128     });
129 }
130
131 static void deleteEmptyRecordsDirectories(const String& recordsPath)
132 {
133     traverseDirectory(recordsPath, [&recordsPath](const String& subdirName, DirectoryEntryType type) {
134         if (type != DirectoryEntryType::Directory)
135             return;
136         // Let system figure out if it is really empty.
137         WebCore::deleteEmptyDirectory(WebCore::pathByAppendingComponent(recordsPath, subdirName));
138     });
139 }
140
141 Storage::Storage(const String& baseDirectoryPath)
142     : m_basePath(baseDirectoryPath)
143     , m_recordsPath(makeRecordsDirectoryPath(baseDirectoryPath))
144     , m_writeOperationDispatchTimer(*this, &Storage::dispatchPendingWriteOperations)
145     , m_ioQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage", WorkQueue::Type::Concurrent))
146     , m_backgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.background", WorkQueue::Type::Concurrent, WorkQueue::QOS::Background))
147     , m_serialBackgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.serialBackground", WorkQueue::Type::Serial, WorkQueue::QOS::Background))
148     , m_blobStorage(makeBlobDirectoryPath(baseDirectoryPath))
149 {
150     deleteOldVersions();
151     synchronize();
152 }
153
154 Storage::~Storage()
155 {
156 }
157
158 String Storage::basePath() const
159 {
160     return m_basePath.isolatedCopy();
161 }
162
163 String Storage::versionPath() const
164 {
165     return makeVersionedDirectoryPath(basePath());
166 }
167
168 String Storage::recordsPath() const
169 {
170     return m_recordsPath.isolatedCopy();
171 }
172
173 size_t Storage::approximateSize() const
174 {
175     return m_approximateRecordsSize + m_blobStorage.approximateSize();
176 }
177
178 void Storage::synchronize()
179 {
180     ASSERT(RunLoop::isMain());
181
182     if (m_synchronizationInProgress || m_shrinkInProgress)
183         return;
184     m_synchronizationInProgress = true;
185
186     LOG(NetworkCacheStorage, "(NetworkProcess) synchronizing cache");
187
188     backgroundIOQueue().dispatch([this] {
189         auto recordFilter = std::make_unique<ContentsFilter>();
190         auto bodyFilter = std::make_unique<ContentsFilter>();
191         size_t recordsSize = 0;
192         unsigned count = 0;
193         traverseRecordsFiles(recordsPath(), [&recordFilter, &bodyFilter, &recordsSize, &count](const String& fileName, const String& partitionPath) {
194             auto filePath = WebCore::pathByAppendingComponent(partitionPath, fileName);
195
196             bool isBody = fileName.endsWith(bodyPostfix);
197             String hashString = isBody ? fileName.substring(0, Key::hashStringLength()) : fileName;
198             Key::HashType hash;
199             if (!Key::stringToHash(hashString, hash)) {
200                 WebCore::deleteFile(filePath);
201                 return;
202             }
203             long long fileSize = 0;
204             WebCore::getFileSize(filePath, fileSize);
205             if (!fileSize) {
206                 WebCore::deleteFile(filePath);
207                 return;
208             }
209             if (isBody) {
210                 bodyFilter->add(hash);
211                 return;
212             }
213             recordFilter->add(hash);
214             recordsSize += fileSize;
215             ++count;
216         });
217
218         auto* recordFilterPtr = recordFilter.release();
219         auto* bodyFilterPtr = bodyFilter.release();
220         RunLoop::main().dispatch([this, recordFilterPtr, bodyFilterPtr, recordsSize] {
221             auto recordFilter = std::unique_ptr<ContentsFilter>(recordFilterPtr);
222             auto bodyFilter = std::unique_ptr<ContentsFilter>(bodyFilterPtr);
223
224             for (auto hash : m_recordFilterHashesAddedDuringSynchronization)
225                 recordFilter->add(hash);
226             m_recordFilterHashesAddedDuringSynchronization.clear();
227
228             for (auto hash : m_bodyFilterHashesAddedDuringSynchronization)
229                 bodyFilter->add(hash);
230             m_bodyFilterHashesAddedDuringSynchronization.clear();
231
232             m_recordFilter = WTF::move(recordFilter);
233             m_bodyFilter = WTF::move(bodyFilter);
234             m_approximateRecordsSize = recordsSize;
235             m_synchronizationInProgress = false;
236         });
237
238         m_blobStorage.synchronize();
239
240         deleteEmptyRecordsDirectories(recordsPath());
241
242         LOG(NetworkCacheStorage, "(NetworkProcess) cache synchronization completed size=%zu count=%d", recordsSize, count);
243     });
244 }
245
246 void Storage::addToRecordFilter(const Key& key)
247 {
248     ASSERT(RunLoop::isMain());
249
250     if (m_recordFilter)
251         m_recordFilter->add(key.hash());
252
253     // If we get new entries during filter synchronization take care to add them to the new filter as well.
254     if (m_synchronizationInProgress)
255         m_recordFilterHashesAddedDuringSynchronization.append(key.hash());
256 }
257
258 bool Storage::mayContain(const Key& key) const
259 {
260     ASSERT(RunLoop::isMain());
261     return !m_recordFilter || m_recordFilter->mayContain(key.hash());
262 }
263
264 String Storage::partitionPathForKey(const Key& key) const
265 {
266     ASSERT(!key.partition().isEmpty());
267     return WebCore::pathByAppendingComponent(recordsPath(), key.partition());
268 }
269
270 static String fileNameForKey(const Key& key)
271 {
272     return key.hashAsString();
273 }
274
275 String Storage::recordPathForKey(const Key& key) const
276 {
277     return WebCore::pathByAppendingComponent(partitionPathForKey(key), fileNameForKey(key));
278 }
279
280 static String bodyPathForRecordPath(const String& recordPath)
281 {
282     return recordPath + bodyPostfix;
283 }
284
285 String Storage::bodyPathForKey(const Key& key) const
286 {
287     return bodyPathForRecordPath(recordPathForKey(key));
288 }
289
290 struct RecordMetaData {
291     RecordMetaData() { }
292     explicit RecordMetaData(const Key& key)
293         : cacheStorageVersion(Storage::version)
294         , key(key)
295     { }
296
297     unsigned cacheStorageVersion;
298     Key key;
299     // FIXME: Add encoder/decoder for time_point.
300     std::chrono::milliseconds epochRelativeTimeStamp;
301     SHA1::Digest headerHash;
302     uint64_t headerSize;
303     SHA1::Digest bodyHash;
304     uint64_t bodySize;
305     bool isBodyInline;
306
307     // Not encoded as a field. Header starts immediately after meta data.
308     uint64_t headerOffset;
309 };
310
311 static bool decodeRecordMetaData(RecordMetaData& metaData, const Data& fileData)
312 {
313     bool success = false;
314     fileData.apply([&metaData, &success](const uint8_t* data, size_t size) {
315         Decoder decoder(data, size);
316         if (!decoder.decode(metaData.cacheStorageVersion))
317             return false;
318         if (!decoder.decode(metaData.key))
319             return false;
320         if (!decoder.decode(metaData.epochRelativeTimeStamp))
321             return false;
322         if (!decoder.decode(metaData.headerHash))
323             return false;
324         if (!decoder.decode(metaData.headerSize))
325             return false;
326         if (!decoder.decode(metaData.bodyHash))
327             return false;
328         if (!decoder.decode(metaData.bodySize))
329             return false;
330         if (!decoder.decode(metaData.isBodyInline))
331             return false;
332         if (!decoder.verifyChecksum())
333             return false;
334         metaData.headerOffset = decoder.currentOffset();
335         success = true;
336         return false;
337     });
338     return success;
339 }
340
341 static bool decodeRecordHeader(const Data& fileData, RecordMetaData& metaData, Data& headerData)
342 {
343     if (!decodeRecordMetaData(metaData, fileData)) {
344         LOG(NetworkCacheStorage, "(NetworkProcess) meta data decode failure");
345         return false;
346     }
347
348     if (metaData.cacheStorageVersion != Storage::version) {
349         LOG(NetworkCacheStorage, "(NetworkProcess) version mismatch");
350         return false;
351     }
352
353     headerData = fileData.subrange(metaData.headerOffset, metaData.headerSize);
354     if (metaData.headerHash != computeSHA1(headerData)) {
355         LOG(NetworkCacheStorage, "(NetworkProcess) header checksum mismatch");
356         return false;
357     }
358     return true;
359 }
360
361 void Storage::readRecord(ReadOperation& readOperation, const Data& recordData)
362 {
363     ASSERT(!RunLoop::isMain());
364
365     RecordMetaData metaData;
366     Data headerData;
367     if (!decodeRecordHeader(recordData, metaData, headerData))
368         return;
369
370     if (metaData.key != readOperation.key)
371         return;
372
373     // Sanity check against time stamps in future.
374     auto timeStamp = std::chrono::system_clock::time_point(metaData.epochRelativeTimeStamp);
375     if (timeStamp > std::chrono::system_clock::now())
376         return;
377
378     Data bodyData;
379     if (metaData.isBodyInline) {
380         size_t bodyOffset = metaData.headerOffset + headerData.size();
381         if (bodyOffset + metaData.bodySize != recordData.size())
382             return;
383         bodyData = recordData.subrange(bodyOffset, metaData.bodySize);
384         if (metaData.bodyHash != computeSHA1(bodyData))
385             return;
386     }
387
388     readOperation.expectedBodyHash = metaData.bodyHash;
389     readOperation.resultRecord = std::make_unique<Storage::Record>(Storage::Record {
390         metaData.key,
391         timeStamp,
392         headerData,
393         bodyData
394     });
395 }
396
397 static Data encodeRecordMetaData(const RecordMetaData& metaData)
398 {
399     Encoder encoder;
400
401     encoder << metaData.cacheStorageVersion;
402     encoder << metaData.key;
403     encoder << metaData.epochRelativeTimeStamp;
404     encoder << metaData.headerHash;
405     encoder << metaData.headerSize;
406     encoder << metaData.bodyHash;
407     encoder << metaData.bodySize;
408     encoder << metaData.isBodyInline;
409
410     encoder.encodeChecksum();
411
412     return Data(encoder.buffer(), encoder.bufferSize());
413 }
414
415 Optional<BlobStorage::Blob> Storage::storeBodyAsBlob(WriteOperation& writeOperation)
416 {
417     auto bodyPath = bodyPathForKey(writeOperation.record.key);
418
419     // Store the body.
420     auto blob = m_blobStorage.add(bodyPath, writeOperation.record.body);
421     if (blob.data.isNull())
422         return { };
423
424     ++writeOperation.activeCount;
425
426     RunLoop::main().dispatch([this, blob, &writeOperation] {
427         if (m_bodyFilter)
428             m_bodyFilter->add(writeOperation.record.key.hash());
429         if (m_synchronizationInProgress)
430             m_bodyFilterHashesAddedDuringSynchronization.append(writeOperation.record.key.hash());
431
432         if (writeOperation.mappedBodyHandler)
433             writeOperation.mappedBodyHandler(blob.data);
434
435         finishWriteOperation(writeOperation);
436     });
437     return blob;
438 }
439
440 Data Storage::encodeRecord(const Record& record, Optional<BlobStorage::Blob> blob)
441 {
442     ASSERT(!blob || bytesEqual(blob.value().data, record.body));
443
444     RecordMetaData metaData(record.key);
445     metaData.epochRelativeTimeStamp = std::chrono::duration_cast<std::chrono::milliseconds>(record.timeStamp.time_since_epoch());
446     metaData.headerHash = computeSHA1(record.header);
447     metaData.headerSize = record.header.size();
448     metaData.bodyHash = blob ? blob.value().hash : computeSHA1(record.body);
449     metaData.bodySize = record.body.size();
450     metaData.isBodyInline = !blob;
451
452     auto encodedMetaData = encodeRecordMetaData(metaData);
453     auto headerData = concatenate(encodedMetaData, record.header);
454
455     if (metaData.isBodyInline)
456         return concatenate(headerData, record.body);
457
458     return { headerData };
459 }
460
461 void Storage::remove(const Key& key)
462 {
463     ASSERT(RunLoop::isMain());
464
465     // We can't remove the key from the Bloom filter (but some false positives are expected anyway).
466     // For simplicity we also don't reduce m_approximateSize on removals.
467     // The next synchronization will update everything.
468
469     serialBackgroundIOQueue().dispatch([this, key] {
470         WebCore::deleteFile(recordPathForKey(key));
471         m_blobStorage.remove(bodyPathForKey(key));
472     });
473 }
474
475 void Storage::updateFileModificationTime(const String& path)
476 {
477     StringCapture filePathCapture(path);
478     serialBackgroundIOQueue().dispatch([filePathCapture] {
479         updateFileModificationTimeIfNeeded(filePathCapture.string());
480     });
481 }
482
483 void Storage::dispatchReadOperation(ReadOperation& readOperation)
484 {
485     ASSERT(RunLoop::isMain());
486     ASSERT(m_activeReadOperations.contains(&readOperation));
487
488     auto recordPath = recordPathForKey(readOperation.key);
489
490     ++readOperation.activeCount;
491
492     bool shouldGetBodyBlob = !m_bodyFilter || m_bodyFilter->mayContain(readOperation.key.hash());
493     if (shouldGetBodyBlob)
494         ++readOperation.activeCount;
495
496     RefPtr<IOChannel> channel = IOChannel::open(recordPath, IOChannel::Type::Read);
497     channel->read(0, std::numeric_limits<size_t>::max(), &ioQueue(), [this, &readOperation](const Data& fileData, int error) {
498         if (!error)
499             readRecord(readOperation, fileData);
500         finishReadOperation(readOperation);
501     });
502
503     if (!shouldGetBodyBlob)
504         return;
505
506     // Read the body blob in parallel with the record read.
507     ioQueue().dispatch([this, &readOperation] {
508         auto bodyPath = bodyPathForKey(readOperation.key);
509         readOperation.resultBodyBlob = m_blobStorage.get(bodyPath);
510         finishReadOperation(readOperation);
511     });
512 }
513
514 void Storage::finishReadOperation(ReadOperation& readOperation)
515 {
516     ASSERT(readOperation.activeCount);
517     // Record and body blob reads must finish.
518     if (--readOperation.activeCount)
519         return;
520
521     RunLoop::main().dispatch([this, &readOperation] {
522         if (readOperation.resultRecord && readOperation.resultRecord->body.isNull()) {
523             if (readOperation.resultBodyBlob.hash == readOperation.expectedBodyHash)
524                 readOperation.resultRecord->body = readOperation.resultBodyBlob.data;
525             else
526                 readOperation.resultRecord = nullptr;
527         }
528
529         bool success = readOperation.completionHandler(WTF::move(readOperation.resultRecord));
530         if (success)
531             updateFileModificationTime(recordPathForKey(readOperation.key));
532         else
533             remove(readOperation.key);
534         ASSERT(m_activeReadOperations.contains(&readOperation));
535         m_activeReadOperations.remove(&readOperation);
536         dispatchPendingReadOperations();
537
538         LOG(NetworkCacheStorage, "(NetworkProcess) read complete success=%d", success);
539     });
540 }
541
542 void Storage::dispatchPendingReadOperations()
543 {
544     ASSERT(RunLoop::isMain());
545
546     const int maximumActiveReadOperationCount = 5;
547
548     for (int priority = maximumRetrievePriority; priority >= 0; --priority) {
549         if (m_activeReadOperations.size() > maximumActiveReadOperationCount) {
550             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel retrieves");
551             return;
552         }
553         auto& pendingRetrieveQueue = m_pendingReadOperationsByPriority[priority];
554         if (pendingRetrieveQueue.isEmpty())
555             continue;
556         auto readOperation = pendingRetrieveQueue.takeLast();
557         auto& read = *readOperation;
558         m_activeReadOperations.add(WTF::move(readOperation));
559         dispatchReadOperation(read);
560     }
561 }
562
563 template <class T> bool retrieveFromMemory(const T& operations, const Key& key, Storage::RetrieveCompletionHandler& completionHandler)
564 {
565     for (auto& operation : operations) {
566         if (operation->record.key == key) {
567             LOG(NetworkCacheStorage, "(NetworkProcess) found write operation in progress");
568             auto record = operation->record;
569             RunLoop::main().dispatch([record, completionHandler] {
570                 completionHandler(std::make_unique<Storage::Record>(record));
571             });
572             return true;
573         }
574     }
575     return false;
576 }
577
578 void Storage::dispatchPendingWriteOperations()
579 {
580     ASSERT(RunLoop::isMain());
581
582     const int maximumActiveWriteOperationCount { 1 };
583
584     while (!m_pendingWriteOperations.isEmpty()) {
585         if (m_activeWriteOperations.size() >= maximumActiveWriteOperationCount) {
586             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel writes");
587             return;
588         }
589         auto writeOperation = m_pendingWriteOperations.takeLast();
590         auto& write = *writeOperation;
591         m_activeWriteOperations.add(WTF::move(writeOperation));
592
593         dispatchWriteOperation(write);
594     }
595 }
596
597 static bool shouldStoreBodyAsBlob(const Data& bodyData)
598 {
599     const size_t maximumInlineBodySize { 16 * 1024 };
600     return bodyData.size() > maximumInlineBodySize;
601 }
602
603 void Storage::dispatchWriteOperation(WriteOperation& writeOperation)
604 {
605     ASSERT(RunLoop::isMain());
606     ASSERT(m_activeWriteOperations.contains(&writeOperation));
607
608     // This was added already when starting the store but filter might have been wiped.
609     addToRecordFilter(writeOperation.record.key);
610
611     backgroundIOQueue().dispatch([this, &writeOperation] {
612         auto partitionPath = partitionPathForKey(writeOperation.record.key);
613         auto recordPath = recordPathForKey(writeOperation.record.key);
614
615         WebCore::makeAllDirectories(partitionPath);
616
617         ++writeOperation.activeCount;
618
619         bool shouldStoreAsBlob = shouldStoreBodyAsBlob(writeOperation.record.body);
620         auto bodyBlob = shouldStoreAsBlob ? storeBodyAsBlob(writeOperation) : Nullopt;
621
622         auto recordData = encodeRecord(writeOperation.record, bodyBlob);
623
624         auto channel = IOChannel::open(recordPath, IOChannel::Type::Create);
625         size_t recordSize = recordData.size();
626         channel->write(0, recordData, nullptr, [this, &writeOperation, recordSize](int error) {
627             // On error the entry still stays in the contents filter until next synchronization.
628             m_approximateRecordsSize += recordSize;
629             finishWriteOperation(writeOperation);
630
631             LOG(NetworkCacheStorage, "(NetworkProcess) write complete error=%d", error);
632         });
633     });
634 }
635
636 void Storage::finishWriteOperation(WriteOperation& writeOperation)
637 {
638     ASSERT(RunLoop::isMain());
639     ASSERT(writeOperation.activeCount);
640     ASSERT(m_activeWriteOperations.contains(&writeOperation));
641
642     if (--writeOperation.activeCount)
643         return;
644
645     m_activeWriteOperations.remove(&writeOperation);
646     dispatchPendingWriteOperations();
647
648     shrinkIfNeeded();
649 }
650
651 void Storage::retrieve(const Key& key, unsigned priority, RetrieveCompletionHandler&& completionHandler)
652 {
653     ASSERT(RunLoop::isMain());
654     ASSERT(priority <= maximumRetrievePriority);
655     ASSERT(!key.isNull());
656
657     if (!m_capacity) {
658         completionHandler(nullptr);
659         return;
660     }
661
662     if (!mayContain(key)) {
663         completionHandler(nullptr);
664         return;
665     }
666
667     if (retrieveFromMemory(m_pendingWriteOperations, key, completionHandler))
668         return;
669     if (retrieveFromMemory(m_activeWriteOperations, key, completionHandler))
670         return;
671
672     auto readOperation = std::make_unique<ReadOperation>(key, WTF::move(completionHandler));
673     m_pendingReadOperationsByPriority[priority].prepend(WTF::move(readOperation));
674     dispatchPendingReadOperations();
675 }
676
677 void Storage::store(const Record& record, MappedBodyHandler&& mappedBodyHandler)
678 {
679     ASSERT(RunLoop::isMain());
680     ASSERT(!record.key.isNull());
681
682     if (!m_capacity)
683         return;
684
685     auto writeOperation = std::make_unique<WriteOperation>(record, WTF::move(mappedBodyHandler));
686     m_pendingWriteOperations.prepend(WTF::move(writeOperation));
687
688     // Add key to the filter already here as we do lookups from the pending operations too.
689     addToRecordFilter(record.key);
690
691     bool isInitialWrite = m_pendingWriteOperations.size() == 1;
692     if (!isInitialWrite)
693         return;
694
695     // Delay the start of writes a bit to avoid affecting early page load.
696     // Completing writes will dispatch more writes without delay.
697     static const auto initialWriteDelay = 1_s;
698     m_writeOperationDispatchTimer.startOneShot(initialWriteDelay);
699 }
700
701 void Storage::traverse(TraverseFlags flags, TraverseHandler&& traverseHandler)
702 {
703     ASSERT(RunLoop::isMain());
704     ASSERT(traverseHandler);
705     // Avoid non-thread safe std::function copies.
706
707     auto traverseOperationPtr = std::make_unique<TraverseOperation>(flags, WTF::move(traverseHandler));
708     auto& traverseOperation = *traverseOperationPtr;
709     m_activeTraverseOperations.add(WTF::move(traverseOperationPtr));
710
711     ioQueue().dispatch([this, &traverseOperation] {
712         traverseRecordsFiles(recordsPath(), [this, &traverseOperation](const String& fileName, const String& partitionPath) {
713             if (fileName.length() != Key::hashStringLength())
714                 return;
715             auto recordPath = WebCore::pathByAppendingComponent(partitionPath, fileName);
716
717             double worth = -1;
718             if (traverseOperation.flags & TraverseFlag::ComputeWorth)
719                 worth = computeRecordWorth(fileTimes(recordPath));
720             unsigned bodyShareCount = 0;
721             if (traverseOperation.flags & TraverseFlag::ShareCount)
722                 bodyShareCount = m_blobStorage.shareCount(bodyPathForRecordPath(recordPath));
723
724             std::unique_lock<std::mutex> lock(traverseOperation.activeMutex);
725             ++traverseOperation.activeCount;
726
727             auto channel = IOChannel::open(recordPath, IOChannel::Type::Read);
728             channel->read(0, std::numeric_limits<size_t>::max(), nullptr, [this, &traverseOperation, worth, bodyShareCount](Data& fileData, int) {
729                 RecordMetaData metaData;
730                 Data headerData;
731                 if (decodeRecordHeader(fileData, metaData, headerData)) {
732                     Record record {
733                         metaData.key,
734                         std::chrono::system_clock::time_point(metaData.epochRelativeTimeStamp),
735                         headerData,
736                         { }
737                     };
738                     RecordInfo info {
739                         static_cast<size_t>(metaData.bodySize),
740                         worth,
741                         bodyShareCount,
742                         String::fromUTF8(SHA1::hexDigest(metaData.bodyHash))
743                     };
744                     traverseOperation.handler(&record, info);
745                 }
746
747                 std::lock_guard<std::mutex> lock(traverseOperation.activeMutex);
748                 --traverseOperation.activeCount;
749                 traverseOperation.activeCondition.notify_one();
750             });
751
752             const unsigned maximumParallelReadCount = 5;
753             traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
754                 return traverseOperation.activeCount <= maximumParallelReadCount;
755             });
756         });
757         // Wait for all reads to finish.
758         std::unique_lock<std::mutex> lock(traverseOperation.activeMutex);
759         traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
760             return !traverseOperation.activeCount;
761         });
762         RunLoop::main().dispatch([this, &traverseOperation] {
763             traverseOperation.handler(nullptr, { });
764             m_activeTraverseOperations.remove(&traverseOperation);
765         });
766     });
767 }
768
769 void Storage::setCapacity(size_t capacity)
770 {
771     ASSERT(RunLoop::isMain());
772
773 #if !ASSERT_DISABLED
774     const size_t assumedAverageRecordSize = 50 << 10;
775     size_t maximumRecordCount = capacity / assumedAverageRecordSize;
776     // ~10 bits per element are required for <1% false positive rate.
777     size_t effectiveBloomFilterCapacity = ContentsFilter::tableSize / 10;
778     // If this gets hit it might be time to increase the filter size.
779     ASSERT(maximumRecordCount < effectiveBloomFilterCapacity);
780 #endif
781
782     m_capacity = capacity;
783
784     shrinkIfNeeded();
785 }
786
787 void Storage::clear(std::chrono::system_clock::time_point modifiedSinceTime, std::function<void ()>&& completionHandler)
788 {
789     ASSERT(RunLoop::isMain());
790     LOG(NetworkCacheStorage, "(NetworkProcess) clearing cache");
791
792     if (m_recordFilter)
793         m_recordFilter->clear();
794     if (m_bodyFilter)
795         m_bodyFilter->clear();
796     m_approximateRecordsSize = 0;
797
798     // Avoid non-thread safe std::function copies.
799     auto* completionHandlerPtr = completionHandler ? new std::function<void ()>(WTF::move(completionHandler)) : nullptr;
800
801     ioQueue().dispatch([this, modifiedSinceTime, completionHandlerPtr] {
802         auto recordsPath = this->recordsPath();
803         traverseRecordsFiles(recordsPath, [modifiedSinceTime](const String& fileName, const String& partitionPath) {
804             auto filePath = WebCore::pathByAppendingComponent(partitionPath, fileName);
805             if (modifiedSinceTime > std::chrono::system_clock::time_point::min()) {
806                 auto times = fileTimes(filePath);
807                 if (times.modification < modifiedSinceTime)
808                     return;
809             }
810             WebCore::deleteFile(filePath);
811         });
812
813         deleteEmptyRecordsDirectories(recordsPath);
814
815         // This cleans unreferences blobs.
816         m_blobStorage.synchronize();
817
818         if (completionHandlerPtr) {
819             RunLoop::main().dispatch([completionHandlerPtr] {
820                 (*completionHandlerPtr)();
821                 delete completionHandlerPtr;
822             });
823         }
824     });
825 }
826
827 static double computeRecordWorth(FileTimes times)
828 {
829     using namespace std::chrono;
830     auto age = system_clock::now() - times.creation;
831     // File modification time is updated manually on cache read. We don't use access time since OS may update it automatically.
832     auto accessAge = times.modification - times.creation;
833
834     // For sanity.
835     if (age <= 0_s || accessAge < 0_s || accessAge > age)
836         return 0;
837
838     // We like old entries that have been accessed recently.
839     return duration<double>(accessAge) / age;
840 }
841
842 static double deletionProbability(FileTimes times, unsigned bodyShareCount)
843 {
844     static const double maximumProbability { 0.33 };
845     static const unsigned maximumEffectiveShareCount { 5 };
846
847     auto worth = computeRecordWorth(times);
848
849     // Adjust a bit so the most valuable entries don't get deleted at all.
850     auto effectiveWorth = std::min(1.1 * worth, 1.);
851
852     auto probability =  (1 - effectiveWorth) * maximumProbability;
853
854     // It is less useful to remove an entry that shares its body data.
855     if (bodyShareCount)
856         probability /= std::min(bodyShareCount, maximumEffectiveShareCount);
857
858     return probability;
859 }
860
861 void Storage::shrinkIfNeeded()
862 {
863     ASSERT(RunLoop::isMain());
864
865     if (approximateSize() > m_capacity)
866         shrink();
867 }
868
869 void Storage::shrink()
870 {
871     ASSERT(RunLoop::isMain());
872
873     if (m_shrinkInProgress || m_synchronizationInProgress)
874         return;
875     m_shrinkInProgress = true;
876
877     LOG(NetworkCacheStorage, "(NetworkProcess) shrinking cache approximateSize=%zu capacity=%zu", approximateSize(), m_capacity);
878
879     backgroundIOQueue().dispatch([this] {
880         auto recordsPath = this->recordsPath();
881         traverseRecordsFiles(recordsPath, [this](const String& fileName, const String& partitionPath) {
882             if (fileName.length() != Key::hashStringLength())
883                 return;
884             auto recordPath = WebCore::pathByAppendingComponent(partitionPath, fileName);
885             auto bodyPath = bodyPathForRecordPath(recordPath);
886
887             auto times = fileTimes(recordPath);
888             unsigned bodyShareCount = m_blobStorage.shareCount(bodyPath);
889             auto probability = deletionProbability(times, bodyShareCount);
890
891             bool shouldDelete = randomNumber() < probability;
892
893             LOG(NetworkCacheStorage, "Deletion probability=%f bodyLinkCount=%d shouldDelete=%d", probability, bodyShareCount, shouldDelete);
894
895             if (shouldDelete) {
896                 WebCore::deleteFile(recordPath);
897                 m_blobStorage.remove(bodyPath);
898             }
899         });
900
901         RunLoop::main().dispatch([this] {
902             m_shrinkInProgress = false;
903             // We could synchronize during the shrink traversal. However this is fast and it is better to have just one code path.
904             synchronize();
905         });
906
907         LOG(NetworkCacheStorage, "(NetworkProcess) cache shrink completed");
908     });
909 }
910
911 void Storage::clearWriteQueue()
912 {
913     LOG(NetworkCacheStorage, "(NetworkProcess) clearing write queue");
914
915     m_pendingWriteOperations.clear();
916 }
917
918 void Storage::deleteOldVersions()
919 {
920     backgroundIOQueue().dispatch([this] {
921         auto cachePath = basePath();
922         traverseDirectory(cachePath, [&cachePath](const String& subdirName, DirectoryEntryType type) {
923             if (type != DirectoryEntryType::Directory)
924                 return;
925             if (!subdirName.startsWith(versionDirectoryPrefix))
926                 return;
927             auto versionString = subdirName.substring(strlen(versionDirectoryPrefix));
928             bool success;
929             unsigned directoryVersion = versionString.toUIntStrict(&success);
930             if (!success)
931                 return;
932             if (directoryVersion >= version)
933                 return;
934
935             auto oldVersionPath = WebCore::pathByAppendingComponent(cachePath, subdirName);
936             LOG(NetworkCacheStorage, "(NetworkProcess) deleting old cache version, path %s", oldVersionPath.utf8().data());
937
938             deleteDirectoryRecursively(oldVersionPath);
939         });
940     });
941 }
942
943 }
944 }
945
946 #endif