Drop NetworkCacheStatistics code
[WebKit-https.git] / Source / WebKit / NetworkProcess / cache / NetworkCacheStorage.cpp
1 /*
2  * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "NetworkCacheStorage.h"
28
29 #include "AuxiliaryProcess.h"
30 #include "Logging.h"
31 #include "NetworkCacheCoders.h"
32 #include "NetworkCacheFileSystem.h"
33 #include "NetworkCacheIOChannel.h"
34 #include <mutex>
35 #include <wtf/Condition.h>
36 #include <wtf/Lock.h>
37 #include <wtf/RandomNumber.h>
38 #include <wtf/RunLoop.h>
39 #include <wtf/text/CString.h>
40 #include <wtf/text/StringConcatenateNumbers.h>
41
42 namespace WebKit {
43 namespace NetworkCache {
44
45 static const char saltFileName[] = "salt";
46 static const char versionDirectoryPrefix[] = "Version ";
47 static const char recordsDirectoryName[] = "Records";
48 static const char blobsDirectoryName[] = "Blobs";
49 static const char blobSuffix[] = "-blob";
50 constexpr size_t maximumInlineBodySize { 16 * 1024 };
51
52 static double computeRecordWorth(FileTimes);
53
54 struct Storage::ReadOperation {
55     WTF_MAKE_FAST_ALLOCATED;
56 public:
57     ReadOperation(Storage& storage, const Key& key, RetrieveCompletionHandler&& completionHandler)
58         : storage(storage)
59         , key(key)
60         , completionHandler(WTFMove(completionHandler))
61     { }
62
63     void cancel();
64     bool finish();
65
66     Ref<Storage> storage;
67
68     const Key key;
69     RetrieveCompletionHandler completionHandler;
70     
71     std::unique_ptr<Record> resultRecord;
72     SHA1::Digest expectedBodyHash;
73     BlobStorage::Blob resultBodyBlob;
74     std::atomic<unsigned> activeCount { 0 };
75     bool isCanceled { false };
76     Timings timings;
77 };
78
79 void Storage::ReadOperation::cancel()
80 {
81     ASSERT(RunLoop::isMain());
82
83     if (isCanceled)
84         return;
85     timings.completionTime = MonotonicTime::now();
86     timings.wasCanceled = true;
87     isCanceled = true;
88     completionHandler(nullptr, timings);
89 }
90
91 bool Storage::ReadOperation::finish()
92 {
93     ASSERT(RunLoop::isMain());
94
95     if (isCanceled)
96         return false;
97     if (resultRecord && resultRecord->body.isNull()) {
98         if (resultBodyBlob.hash == expectedBodyHash)
99             resultRecord->body = resultBodyBlob.data;
100         else
101             resultRecord = nullptr;
102     }
103     timings.completionTime = MonotonicTime::now();
104     return completionHandler(WTFMove(resultRecord), timings);
105 }
106
107 struct Storage::WriteOperation {
108     WTF_MAKE_FAST_ALLOCATED;
109 public:
110     WriteOperation(Storage& storage, const Record& record, MappedBodyHandler&& mappedBodyHandler, CompletionHandler<void(int)>&& completionHandler)
111         : storage(storage)
112         , record(record)
113         , mappedBodyHandler(WTFMove(mappedBodyHandler))
114         , completionHandler(WTFMove(completionHandler))
115     { }
116
117     Ref<Storage> storage;
118
119     const Record record;
120     const MappedBodyHandler mappedBodyHandler;
121     CompletionHandler<void(int)> completionHandler;
122
123     std::atomic<unsigned> activeCount { 0 };
124 };
125
126 struct Storage::TraverseOperation {
127     WTF_MAKE_FAST_ALLOCATED;
128 public:
129     TraverseOperation(Ref<Storage>&& storage, const String& type, OptionSet<TraverseFlag> flags, TraverseHandler&& handler)
130         : storage(WTFMove(storage))
131         , type(type)
132         , flags(flags)
133         , handler(WTFMove(handler))
134     { }
135     Ref<Storage> storage;
136
137     const String type;
138     const OptionSet<TraverseFlag> flags;
139     const TraverseHandler handler;
140
141     Lock activeMutex;
142     Condition activeCondition;
143     unsigned activeCount { 0 };
144 };
145
146 static String makeVersionedDirectoryPath(const String& baseDirectoryPath)
147 {
148     String versionSubdirectory = makeString(versionDirectoryPrefix, Storage::version);
149     return FileSystem::pathByAppendingComponent(baseDirectoryPath, versionSubdirectory);
150 }
151
152 static String makeRecordsDirectoryPath(const String& baseDirectoryPath)
153 {
154     return FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), recordsDirectoryName);
155 }
156
157 static String makeBlobDirectoryPath(const String& baseDirectoryPath)
158 {
159     return FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), blobsDirectoryName);
160 }
161
162 static String makeSaltFilePath(const String& baseDirectoryPath)
163 {
164     return FileSystem::pathByAppendingComponent(makeVersionedDirectoryPath(baseDirectoryPath), saltFileName);
165 }
166
167 RefPtr<Storage> Storage::open(const String& cachePath, Mode mode)
168 {
169     ASSERT(RunLoop::isMain());
170
171     if (!FileSystem::makeAllDirectories(makeVersionedDirectoryPath(cachePath)))
172         return nullptr;
173     auto salt = readOrMakeSalt(makeSaltFilePath(cachePath));
174     if (!salt)
175         return nullptr;
176     return adoptRef(new Storage(cachePath, mode, *salt));
177 }
178
179 using RecordFileTraverseFunction = Function<void (const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath)>;
180 static void traverseRecordsFiles(const String& recordsPath, const String& expectedType, const RecordFileTraverseFunction& function)
181 {
182     traverseDirectory(recordsPath, [&](const String& partitionName, DirectoryEntryType entryType) {
183         if (entryType != DirectoryEntryType::Directory)
184             return;
185         String partitionPath = FileSystem::pathByAppendingComponent(recordsPath, partitionName);
186         traverseDirectory(partitionPath, [&](const String& actualType, DirectoryEntryType entryType) {
187             if (entryType != DirectoryEntryType::Directory)
188                 return;
189             if (!expectedType.isEmpty() && expectedType != actualType)
190                 return;
191             String recordDirectoryPath = FileSystem::pathByAppendingComponent(partitionPath, actualType);
192             traverseDirectory(recordDirectoryPath, [&function, &recordDirectoryPath, &actualType](const String& fileName, DirectoryEntryType entryType) {
193                 if (entryType != DirectoryEntryType::File || fileName.length() < Key::hashStringLength())
194                     return;
195
196                 String hashString = fileName.substring(0, Key::hashStringLength());
197                 auto isBlob = fileName.length() > Key::hashStringLength() && fileName.endsWith(blobSuffix);
198                 function(fileName, hashString, actualType, isBlob, recordDirectoryPath);
199             });
200         });
201     });
202 }
203
204 static void deleteEmptyRecordsDirectories(const String& recordsPath)
205 {
206     traverseDirectory(recordsPath, [&recordsPath](const String& partitionName, DirectoryEntryType type) {
207         if (type != DirectoryEntryType::Directory)
208             return;
209
210         // Delete [type] sub-folders.
211         String partitionPath = FileSystem::pathByAppendingComponent(recordsPath, partitionName);
212         traverseDirectory(partitionPath, [&partitionPath](const String& subdirName, DirectoryEntryType entryType) {
213             if (entryType != DirectoryEntryType::Directory)
214                 return;
215
216             // Let system figure out if it is really empty.
217             FileSystem::deleteEmptyDirectory(FileSystem::pathByAppendingComponent(partitionPath, subdirName));
218         });
219
220         // Delete [Partition] folders.
221         // Let system figure out if it is really empty.
222         FileSystem::deleteEmptyDirectory(FileSystem::pathByAppendingComponent(recordsPath, partitionName));
223     });
224 }
225
226 Storage::Storage(const String& baseDirectoryPath, Mode mode, Salt salt)
227     : m_basePath(baseDirectoryPath)
228     , m_recordsPath(makeRecordsDirectoryPath(baseDirectoryPath))
229     , m_mode(mode)
230     , m_salt(salt)
231     , m_canUseBlobsForForBodyData(isSafeToUseMemoryMapForPath(baseDirectoryPath))
232     , m_readOperationTimeoutTimer(*this, &Storage::cancelAllReadOperations)
233     , m_writeOperationDispatchTimer(*this, &Storage::dispatchPendingWriteOperations)
234     , m_ioQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage", WorkQueue::Type::Concurrent))
235     , m_backgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.background", WorkQueue::Type::Concurrent, WorkQueue::QOS::Background))
236     , m_serialBackgroundIOQueue(WorkQueue::create("com.apple.WebKit.Cache.Storage.serialBackground", WorkQueue::Type::Serial, WorkQueue::QOS::Background))
237     , m_blobStorage(makeBlobDirectoryPath(baseDirectoryPath), m_salt)
238 {
239     ASSERT(RunLoop::isMain());
240
241     deleteOldVersions();
242     synchronize();
243 }
244
245 Storage::~Storage()
246 {
247     ASSERT(RunLoop::isMain());
248     ASSERT(m_activeReadOperations.isEmpty());
249     ASSERT(m_activeWriteOperations.isEmpty());
250     ASSERT(m_activeTraverseOperations.isEmpty());
251     ASSERT(!m_synchronizationInProgress);
252     ASSERT(!m_shrinkInProgress);
253 }
254
255 String Storage::basePath() const
256 {
257     return m_basePath.isolatedCopy();
258 }
259
260 String Storage::versionPath() const
261 {
262     return makeVersionedDirectoryPath(basePath());
263 }
264
265 String Storage::recordsPath() const
266 {
267     return m_recordsPath.isolatedCopy();
268 }
269
270 size_t Storage::approximateSize() const
271 {
272     return m_approximateRecordsSize + m_blobStorage.approximateSize();
273 }
274
275 static size_t estimateRecordsSize(unsigned recordCount, unsigned blobCount)
276 {
277     auto inlineBodyCount = recordCount - std::min(blobCount, recordCount);
278     auto headerSizes = recordCount * 4096;
279     auto inlineBodySizes = (maximumInlineBodySize / 2) * inlineBodyCount;
280     return headerSizes + inlineBodySizes;
281 }
282
283 void Storage::synchronize()
284 {
285     ASSERT(RunLoop::isMain());
286
287     if (m_synchronizationInProgress || m_shrinkInProgress)
288         return;
289     m_synchronizationInProgress = true;
290
291     LOG(NetworkCacheStorage, "(NetworkProcess) synchronizing cache");
292
293     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
294         auto recordFilter = std::make_unique<ContentsFilter>();
295         auto blobFilter = std::make_unique<ContentsFilter>();
296
297         // Most of the disk space usage is in blobs if we are using them. Approximate records file sizes to avoid expensive stat() calls.
298         bool shouldComputeExactRecordsSize = !m_canUseBlobsForForBodyData;
299         size_t recordsSize = 0;
300         unsigned recordCount = 0;
301         unsigned blobCount = 0;
302
303         String anyType;
304         traverseRecordsFiles(recordsPath(), anyType, [&](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
305             auto filePath = FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
306
307             Key::HashType hash;
308             if (!Key::stringToHash(hashString, hash)) {
309                 FileSystem::deleteFile(filePath);
310                 return;
311             }
312
313             if (isBlob) {
314                 ++blobCount;
315                 blobFilter->add(hash);
316                 return;
317             }
318
319             ++recordCount;
320
321             if (shouldComputeExactRecordsSize) {
322                 long long fileSize = 0;
323                 FileSystem::getFileSize(filePath, fileSize);
324                 recordsSize += fileSize;
325             }
326
327             recordFilter->add(hash);
328         });
329
330         if (!shouldComputeExactRecordsSize)
331             recordsSize = estimateRecordsSize(recordCount, blobCount);
332
333         m_blobStorage.synchronize();
334
335         deleteEmptyRecordsDirectories(recordsPath());
336
337         LOG(NetworkCacheStorage, "(NetworkProcess) cache synchronization completed size=%zu recordCount=%u", recordsSize, recordCount);
338
339         RunLoop::main().dispatch([this, protectedThis = WTFMove(protectedThis), recordFilter = WTFMove(recordFilter), blobFilter = WTFMove(blobFilter), recordsSize]() mutable {
340             for (auto& recordFilterKey : m_recordFilterHashesAddedDuringSynchronization)
341                 recordFilter->add(recordFilterKey);
342             m_recordFilterHashesAddedDuringSynchronization.clear();
343
344             for (auto& hash : m_blobFilterHashesAddedDuringSynchronization)
345                 blobFilter->add(hash);
346             m_blobFilterHashesAddedDuringSynchronization.clear();
347
348             m_recordFilter = WTFMove(recordFilter);
349             m_blobFilter = WTFMove(blobFilter);
350             m_approximateRecordsSize = recordsSize;
351             m_synchronizationInProgress = false;
352         });
353
354     });
355 }
356
357 void Storage::addToRecordFilter(const Key& key)
358 {
359     ASSERT(RunLoop::isMain());
360
361     if (m_recordFilter)
362         m_recordFilter->add(key.hash());
363
364     // If we get new entries during filter synchronization take care to add them to the new filter as well.
365     if (m_synchronizationInProgress)
366         m_recordFilterHashesAddedDuringSynchronization.append(key.hash());
367 }
368
369 bool Storage::mayContain(const Key& key) const
370 {
371     ASSERT(RunLoop::isMain());
372     return !m_recordFilter || m_recordFilter->mayContain(key.hash());
373 }
374
375 bool Storage::mayContainBlob(const Key& key) const
376 {
377     ASSERT(RunLoop::isMain());
378     if (!m_canUseBlobsForForBodyData)
379         return false;
380     return !m_blobFilter || m_blobFilter->mayContain(key.hash());
381 }
382
383 String Storage::recordDirectoryPathForKey(const Key& key) const
384 {
385     ASSERT(!key.type().isEmpty());
386     return FileSystem::pathByAppendingComponent(FileSystem::pathByAppendingComponent(recordsPath(), key.partitionHashAsString()), key.type());
387 }
388
389 String Storage::recordPathForKey(const Key& key) const
390 {
391     return FileSystem::pathByAppendingComponent(recordDirectoryPathForKey(key), key.hashAsString());
392 }
393
394 static String blobPathForRecordPath(const String& recordPath)
395 {
396     return recordPath + blobSuffix;
397 }
398
399 String Storage::blobPathForKey(const Key& key) const
400 {
401     return blobPathForRecordPath(recordPathForKey(key));
402 }
403
404 struct RecordMetaData {
405     RecordMetaData() { }
406     explicit RecordMetaData(const Key& key)
407         : cacheStorageVersion(Storage::version)
408         , key(key)
409     { }
410
411     unsigned cacheStorageVersion;
412     Key key;
413     WallTime timeStamp;
414     SHA1::Digest headerHash;
415     uint64_t headerSize { 0 };
416     SHA1::Digest bodyHash;
417     uint64_t bodySize { 0 };
418     bool isBodyInline { false };
419
420     // Not encoded as a field. Header starts immediately after meta data.
421     uint64_t headerOffset { 0 };
422 };
423
424 static bool decodeRecordMetaData(RecordMetaData& metaData, const Data& fileData)
425 {
426     bool success = false;
427     fileData.apply([&metaData, &success](const uint8_t* data, size_t size) {
428         WTF::Persistence::Decoder decoder(data, size);
429         if (!decoder.decode(metaData.cacheStorageVersion))
430             return false;
431         if (!decoder.decode(metaData.key))
432             return false;
433         if (!decoder.decode(metaData.timeStamp))
434             return false;
435         if (!decoder.decode(metaData.headerHash))
436             return false;
437         if (!decoder.decode(metaData.headerSize))
438             return false;
439         if (!decoder.decode(metaData.bodyHash))
440             return false;
441         if (!decoder.decode(metaData.bodySize))
442             return false;
443         if (!decoder.decode(metaData.isBodyInline))
444             return false;
445         if (!decoder.verifyChecksum())
446             return false;
447         metaData.headerOffset = decoder.currentOffset();
448         success = true;
449         return false;
450     });
451     return success;
452 }
453
454 static bool decodeRecordHeader(const Data& fileData, RecordMetaData& metaData, Data& headerData, const Salt& salt)
455 {
456     if (!decodeRecordMetaData(metaData, fileData)) {
457         LOG(NetworkCacheStorage, "(NetworkProcess) meta data decode failure");
458         return false;
459     }
460
461     if (metaData.cacheStorageVersion != Storage::version) {
462         LOG(NetworkCacheStorage, "(NetworkProcess) version mismatch");
463         return false;
464     }
465
466     headerData = fileData.subrange(metaData.headerOffset, metaData.headerSize);
467     if (metaData.headerHash != computeSHA1(headerData, salt)) {
468         LOG(NetworkCacheStorage, "(NetworkProcess) header checksum mismatch");
469         return false;
470     }
471     return true;
472 }
473
474 void Storage::readRecord(ReadOperation& readOperation, const Data& recordData)
475 {
476     ASSERT(!RunLoop::isMain());
477
478     RecordMetaData metaData;
479     Data headerData;
480     if (!decodeRecordHeader(recordData, metaData, headerData, m_salt))
481         return;
482
483     if (metaData.key != readOperation.key)
484         return;
485
486     // Sanity check against time stamps in future.
487     if (metaData.timeStamp > WallTime::now())
488         return;
489
490     Data bodyData;
491     if (metaData.isBodyInline) {
492         size_t bodyOffset = metaData.headerOffset + headerData.size();
493         if (bodyOffset + metaData.bodySize != recordData.size())
494             return;
495         bodyData = recordData.subrange(bodyOffset, metaData.bodySize);
496         if (metaData.bodyHash != computeSHA1(bodyData, m_salt))
497             return;
498     }
499
500     readOperation.expectedBodyHash = metaData.bodyHash;
501     readOperation.resultRecord = std::make_unique<Storage::Record>(Storage::Record {
502         metaData.key,
503         metaData.timeStamp,
504         headerData,
505         bodyData,
506         metaData.bodyHash
507     });
508 }
509
510 static Data encodeRecordMetaData(const RecordMetaData& metaData)
511 {
512     WTF::Persistence::Encoder encoder;
513
514     encoder << metaData.cacheStorageVersion;
515     encoder << metaData.key;
516     encoder << metaData.timeStamp;
517     encoder << metaData.headerHash;
518     encoder << metaData.headerSize;
519     encoder << metaData.bodyHash;
520     encoder << metaData.bodySize;
521     encoder << metaData.isBodyInline;
522
523     encoder.encodeChecksum();
524
525     return Data(encoder.buffer(), encoder.bufferSize());
526 }
527
528 Optional<BlobStorage::Blob> Storage::storeBodyAsBlob(WriteOperation& writeOperation)
529 {
530     auto blobPath = blobPathForKey(writeOperation.record.key);
531
532     // Store the body.
533     auto blob = m_blobStorage.add(blobPath, writeOperation.record.body);
534     if (blob.data.isNull())
535         return { };
536
537     ++writeOperation.activeCount;
538
539     RunLoop::main().dispatch([this, blob, &writeOperation] {
540         if (m_blobFilter)
541             m_blobFilter->add(writeOperation.record.key.hash());
542         if (m_synchronizationInProgress)
543             m_blobFilterHashesAddedDuringSynchronization.append(writeOperation.record.key.hash());
544
545         if (writeOperation.mappedBodyHandler)
546             writeOperation.mappedBodyHandler(blob.data);
547
548         finishWriteOperation(writeOperation);
549     });
550     return blob;
551 }
552
553 Data Storage::encodeRecord(const Record& record, Optional<BlobStorage::Blob> blob)
554 {
555     ASSERT(!blob || bytesEqual(blob.value().data, record.body));
556
557     RecordMetaData metaData(record.key);
558     metaData.timeStamp = record.timeStamp;
559     metaData.headerHash = computeSHA1(record.header, m_salt);
560     metaData.headerSize = record.header.size();
561     metaData.bodyHash = blob ? blob.value().hash : computeSHA1(record.body, m_salt);
562     metaData.bodySize = record.body.size();
563     metaData.isBodyInline = !blob;
564
565     auto encodedMetaData = encodeRecordMetaData(metaData);
566     auto headerData = concatenate(encodedMetaData, record.header);
567
568     if (metaData.isBodyInline)
569         return concatenate(headerData, record.body);
570
571     return { headerData };
572 }
573
574 void Storage::removeFromPendingWriteOperations(const Key& key)
575 {
576     while (true) {
577         auto found = m_pendingWriteOperations.findIf([&key](auto& operation) {
578             return operation->record.key == key;
579         });
580
581         if (found == m_pendingWriteOperations.end())
582             break;
583
584         m_pendingWriteOperations.remove(found);
585     }
586 }
587
588 void Storage::remove(const Key& key)
589 {
590     ASSERT(RunLoop::isMain());
591
592     if (!mayContain(key))
593         return;
594
595     auto protectedThis = makeRef(*this);
596
597     // We can't remove the key from the Bloom filter (but some false positives are expected anyway).
598     // For simplicity we also don't reduce m_approximateSize on removals.
599     // The next synchronization will update everything.
600
601     removeFromPendingWriteOperations(key);
602
603     serialBackgroundIOQueue().dispatch([this, protectedThis = WTFMove(protectedThis), key] () mutable {
604         deleteFiles(key);
605     });
606 }
607
608 void Storage::remove(const Vector<Key>& keys, CompletionHandler<void()>&& completionHandler)
609 {
610     ASSERT(RunLoop::isMain());
611
612     Vector<Key> keysToRemove;
613     keysToRemove.reserveInitialCapacity(keys.size());
614
615     for (auto& key : keys) {
616         if (!mayContain(key))
617             continue;
618         removeFromPendingWriteOperations(key);
619         keysToRemove.uncheckedAppend(key);
620     }
621
622     serialBackgroundIOQueue().dispatch([this, protectedThis = makeRef(*this), keysToRemove = WTFMove(keysToRemove), completionHandler = WTFMove(completionHandler)] () mutable {
623         for (auto& key : keysToRemove)
624             deleteFiles(key);
625
626         RunLoop::main().dispatch(WTFMove(completionHandler));
627     });
628 }
629
630 void Storage::deleteFiles(const Key& key)
631 {
632     ASSERT(!RunLoop::isMain());
633
634     FileSystem::deleteFile(recordPathForKey(key));
635     m_blobStorage.remove(blobPathForKey(key));
636 }
637
638 void Storage::updateFileModificationTime(const String& path)
639 {
640     serialBackgroundIOQueue().dispatch([path = path.isolatedCopy()] {
641         updateFileModificationTimeIfNeeded(path);
642     });
643 }
644
645 void Storage::dispatchReadOperation(std::unique_ptr<ReadOperation> readOperationPtr)
646 {
647     ASSERT(RunLoop::isMain());
648
649     auto& readOperation = *readOperationPtr;
650     m_activeReadOperations.add(WTFMove(readOperationPtr));
651
652     readOperation.timings.dispatchTime = MonotonicTime::now();
653     readOperation.timings.synchronizationInProgressAtDispatch = m_synchronizationInProgress;
654     readOperation.timings.shrinkInProgressAtDispatch = m_shrinkInProgress;
655     readOperation.timings.dispatchCountAtDispatch = m_readOperationDispatchCount;
656
657     ++m_readOperationDispatchCount;
658
659     // Avoid randomness during testing.
660     if (m_mode != Mode::AvoidRandomness) {
661         // I/O pressure may make disk operations slow. If they start taking very long time we rather go to network.
662         const Seconds readTimeout = 1500_ms;
663         m_readOperationTimeoutTimer.startOneShot(readTimeout);
664     }
665
666     bool shouldGetBodyBlob = mayContainBlob(readOperation.key);
667
668     ioQueue().dispatch([this, &readOperation, shouldGetBodyBlob] {
669         auto recordPath = recordPathForKey(readOperation.key);
670
671         ++readOperation.activeCount;
672         if (shouldGetBodyBlob)
673             ++readOperation.activeCount;
674
675         readOperation.timings.recordIOStartTime = MonotonicTime::now();
676
677         auto channel = IOChannel::open(recordPath, IOChannel::Type::Read);
678         channel->read(0, std::numeric_limits<size_t>::max(), &ioQueue(), [this, &readOperation](const Data& fileData, int error) {
679             readOperation.timings.recordIOEndTime = MonotonicTime::now();
680             if (!error)
681                 readRecord(readOperation, fileData);
682             finishReadOperation(readOperation);
683         });
684
685         if (shouldGetBodyBlob) {
686             // Read the blob in parallel with the record read.
687             readOperation.timings.blobIOStartTime = MonotonicTime::now();
688
689             auto blobPath = blobPathForKey(readOperation.key);
690             readOperation.resultBodyBlob = m_blobStorage.get(blobPath);
691
692             readOperation.timings.blobIOEndTime = MonotonicTime::now();
693
694             finishReadOperation(readOperation);
695         }
696     });
697 }
698
699 void Storage::finishReadOperation(ReadOperation& readOperation)
700 {
701     ASSERT(readOperation.activeCount);
702     // Record and blob reads must finish.
703     if (--readOperation.activeCount)
704         return;
705
706     RunLoop::main().dispatch([this, &readOperation] {
707         bool success = readOperation.finish();
708         if (success)
709             updateFileModificationTime(recordPathForKey(readOperation.key));
710         else if (!readOperation.isCanceled)
711             remove(readOperation.key);
712
713         auto protectedThis = makeRef(*this);
714
715         ASSERT(m_activeReadOperations.contains(&readOperation));
716         m_activeReadOperations.remove(&readOperation);
717
718         if (m_activeReadOperations.isEmpty())
719             m_readOperationTimeoutTimer.stop();
720         
721         dispatchPendingReadOperations();
722
723         LOG(NetworkCacheStorage, "(NetworkProcess) read complete success=%d", success);
724     });
725 }
726
727 void Storage::cancelAllReadOperations()
728 {
729     ASSERT(RunLoop::isMain());
730
731     for (auto& readOperation : m_activeReadOperations)
732         readOperation->cancel();
733
734     size_t pendingCount = 0;
735     for (int priority = maximumRetrievePriority; priority >= 0; --priority) {
736         auto& pendingRetrieveQueue = m_pendingReadOperationsByPriority[priority];
737         pendingCount += pendingRetrieveQueue.size();
738         for (auto it = pendingRetrieveQueue.rbegin(), end = pendingRetrieveQueue.rend(); it != end; ++it)
739             (*it)->cancel();
740         pendingRetrieveQueue.clear();
741     }
742
743     LOG(NetworkCacheStorage, "(NetworkProcess) retrieve timeout, canceled %u active and %zu pending", m_activeReadOperations.size(), pendingCount);
744 }
745
746 void Storage::dispatchPendingReadOperations()
747 {
748     ASSERT(RunLoop::isMain());
749
750     const int maximumActiveReadOperationCount = 5;
751
752     for (int priority = maximumRetrievePriority; priority >= 0; --priority) {
753         if (m_activeReadOperations.size() > maximumActiveReadOperationCount) {
754             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel retrieves");
755             return;
756         }
757         auto& pendingRetrieveQueue = m_pendingReadOperationsByPriority[priority];
758         if (pendingRetrieveQueue.isEmpty())
759             continue;
760         dispatchReadOperation(pendingRetrieveQueue.takeLast());
761     }
762 }
763
764 template <class T> bool retrieveFromMemory(const T& operations, const Key& key, Storage::RetrieveCompletionHandler& completionHandler)
765 {
766     for (auto& operation : operations) {
767         if (operation->record.key == key) {
768             LOG(NetworkCacheStorage, "(NetworkProcess) found write operation in progress");
769             RunLoop::main().dispatch([record = operation->record, completionHandler = WTFMove(completionHandler)] () mutable {
770                 completionHandler(std::make_unique<Storage::Record>(record), { });
771             });
772             return true;
773         }
774     }
775     return false;
776 }
777
778 void Storage::dispatchPendingWriteOperations()
779 {
780     ASSERT(RunLoop::isMain());
781
782     const int maximumActiveWriteOperationCount { 1 };
783
784     while (!m_pendingWriteOperations.isEmpty()) {
785         if (m_activeWriteOperations.size() >= maximumActiveWriteOperationCount) {
786             LOG(NetworkCacheStorage, "(NetworkProcess) limiting parallel writes");
787             return;
788         }
789         dispatchWriteOperation(m_pendingWriteOperations.takeLast());
790     }
791 }
792
793 bool Storage::shouldStoreBodyAsBlob(const Data& bodyData)
794 {
795     if (!m_canUseBlobsForForBodyData)
796         return false;
797     return bodyData.size() > maximumInlineBodySize;
798 }
799
800 void Storage::dispatchWriteOperation(std::unique_ptr<WriteOperation> writeOperationPtr)
801 {
802     ASSERT(RunLoop::isMain());
803
804     auto& writeOperation = *writeOperationPtr;
805     m_activeWriteOperations.add(WTFMove(writeOperationPtr));
806
807     // This was added already when starting the store but filter might have been wiped.
808     addToRecordFilter(writeOperation.record.key);
809
810     backgroundIOQueue().dispatch([this, &writeOperation] {
811         auto recordDirectorPath = recordDirectoryPathForKey(writeOperation.record.key);
812         auto recordPath = recordPathForKey(writeOperation.record.key);
813
814         FileSystem::makeAllDirectories(recordDirectorPath);
815
816         ++writeOperation.activeCount;
817
818         bool shouldStoreAsBlob = shouldStoreBodyAsBlob(writeOperation.record.body);
819         auto blob = shouldStoreAsBlob ? storeBodyAsBlob(writeOperation) : WTF::nullopt;
820
821         auto recordData = encodeRecord(writeOperation.record, blob);
822
823         auto channel = IOChannel::open(recordPath, IOChannel::Type::Create);
824         size_t recordSize = recordData.size();
825         channel->write(0, recordData, nullptr, [this, &writeOperation, recordSize](int error) {
826             // On error the entry still stays in the contents filter until next synchronization.
827             m_approximateRecordsSize += recordSize;
828             finishWriteOperation(writeOperation, error);
829
830             LOG(NetworkCacheStorage, "(NetworkProcess) write complete error=%d", error);
831         });
832     });
833 }
834
835 void Storage::finishWriteOperation(WriteOperation& writeOperation, int error)
836 {
837     ASSERT(RunLoop::isMain());
838     ASSERT(writeOperation.activeCount);
839     ASSERT(m_activeWriteOperations.contains(&writeOperation));
840
841     if (--writeOperation.activeCount)
842         return;
843
844     auto protectedThis = makeRef(*this);
845
846     if (writeOperation.completionHandler)
847         writeOperation.completionHandler(error);
848
849     m_activeWriteOperations.remove(&writeOperation);
850     dispatchPendingWriteOperations();
851
852     shrinkIfNeeded();
853 }
854
855 void Storage::retrieve(const Key& key, unsigned priority, RetrieveCompletionHandler&& completionHandler)
856 {
857     ASSERT(RunLoop::isMain());
858     ASSERT(priority <= maximumRetrievePriority);
859     ASSERT(!key.isNull());
860
861     if (!m_capacity) {
862         completionHandler(nullptr, { });
863         return;
864     }
865
866     if (!mayContain(key)) {
867         completionHandler(nullptr, { });
868         return;
869     }
870
871     if (retrieveFromMemory(m_pendingWriteOperations, key, completionHandler))
872         return;
873     if (retrieveFromMemory(m_activeWriteOperations, key, completionHandler))
874         return;
875
876     auto readOperation = std::make_unique<ReadOperation>(*this, key, WTFMove(completionHandler));
877
878     readOperation->timings.startTime = MonotonicTime::now();
879     readOperation->timings.dispatchCountAtStart = m_readOperationDispatchCount;
880
881     m_pendingReadOperationsByPriority[priority].prepend(WTFMove(readOperation));
882     dispatchPendingReadOperations();
883 }
884
885 void Storage::store(const Record& record, MappedBodyHandler&& mappedBodyHandler, CompletionHandler<void(int)>&& completionHandler)
886 {
887     ASSERT(RunLoop::isMain());
888     ASSERT(!record.key.isNull());
889
890     if (!m_capacity)
891         return;
892
893     auto writeOperation = std::make_unique<WriteOperation>(*this, record, WTFMove(mappedBodyHandler), WTFMove(completionHandler));
894     m_pendingWriteOperations.prepend(WTFMove(writeOperation));
895
896     // Add key to the filter already here as we do lookups from the pending operations too.
897     addToRecordFilter(record.key);
898
899     bool isInitialWrite = m_pendingWriteOperations.size() == 1;
900     if (!isInitialWrite)
901         return;
902
903     m_writeOperationDispatchTimer.startOneShot(m_initialWriteDelay);
904 }
905
906 void Storage::traverse(const String& type, OptionSet<TraverseFlag> flags, TraverseHandler&& traverseHandler)
907 {
908     ASSERT(RunLoop::isMain());
909     ASSERT(traverseHandler);
910     // Avoid non-thread safe Function copies.
911
912     auto traverseOperationPtr = std::make_unique<TraverseOperation>(makeRef(*this), type, flags, WTFMove(traverseHandler));
913     auto& traverseOperation = *traverseOperationPtr;
914     m_activeTraverseOperations.add(WTFMove(traverseOperationPtr));
915
916     ioQueue().dispatch([this, &traverseOperation] {
917         traverseRecordsFiles(recordsPath(), traverseOperation.type, [this, &traverseOperation](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
918             ASSERT(type == traverseOperation.type || traverseOperation.type.isEmpty());
919             if (isBlob)
920                 return;
921
922             auto recordPath = FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
923
924             double worth = -1;
925             if (traverseOperation.flags & TraverseFlag::ComputeWorth)
926                 worth = computeRecordWorth(fileTimes(recordPath));
927             unsigned bodyShareCount = 0;
928             if (traverseOperation.flags & TraverseFlag::ShareCount)
929                 bodyShareCount = m_blobStorage.shareCount(blobPathForRecordPath(recordPath));
930
931             std::unique_lock<Lock> lock(traverseOperation.activeMutex);
932             ++traverseOperation.activeCount;
933
934             auto channel = IOChannel::open(recordPath, IOChannel::Type::Read);
935             channel->read(0, std::numeric_limits<size_t>::max(), nullptr, [this, &traverseOperation, worth, bodyShareCount](Data& fileData, int) {
936                 RecordMetaData metaData;
937                 Data headerData;
938                 if (decodeRecordHeader(fileData, metaData, headerData, m_salt)) {
939                     Record record {
940                         metaData.key,
941                         metaData.timeStamp,
942                         headerData,
943                         { },
944                         metaData.bodyHash
945                     };
946                     RecordInfo info {
947                         static_cast<size_t>(metaData.bodySize),
948                         worth,
949                         bodyShareCount,
950                         String::fromUTF8(SHA1::hexDigest(metaData.bodyHash))
951                     };
952                     traverseOperation.handler(&record, info);
953                 }
954
955                 std::lock_guard<Lock> lock(traverseOperation.activeMutex);
956                 --traverseOperation.activeCount;
957                 traverseOperation.activeCondition.notifyOne();
958             });
959
960             static const unsigned maximumParallelReadCount = 5;
961             traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
962                 return traverseOperation.activeCount <= maximumParallelReadCount;
963             });
964         });
965         {
966             // Wait for all reads to finish.
967             std::unique_lock<Lock> lock(traverseOperation.activeMutex);
968             traverseOperation.activeCondition.wait(lock, [&traverseOperation] {
969                 return !traverseOperation.activeCount;
970             });
971         }
972         RunLoop::main().dispatch([this, &traverseOperation] {
973             traverseOperation.handler(nullptr, { });
974
975             auto protectedThis = makeRef(*this);
976
977             m_activeTraverseOperations.remove(&traverseOperation);
978         });
979     });
980 }
981
982 void Storage::setCapacity(size_t capacity)
983 {
984     ASSERT(RunLoop::isMain());
985
986 #if !ASSERT_DISABLED
987     const size_t assumedAverageRecordSize = 50 << 10;
988     size_t maximumRecordCount = capacity / assumedAverageRecordSize;
989     // ~10 bits per element are required for <1% false positive rate.
990     size_t effectiveBloomFilterCapacity = ContentsFilter::tableSize / 10;
991     // If this gets hit it might be time to increase the filter size.
992     ASSERT(maximumRecordCount < effectiveBloomFilterCapacity);
993 #endif
994
995     m_capacity = capacity;
996
997     shrinkIfNeeded();
998 }
999
1000 void Storage::clear(const String& type, WallTime modifiedSinceTime, CompletionHandler<void()>&& completionHandler)
1001 {
1002     ASSERT(RunLoop::isMain());
1003     LOG(NetworkCacheStorage, "(NetworkProcess) clearing cache");
1004
1005     if (m_recordFilter)
1006         m_recordFilter->clear();
1007     if (m_blobFilter)
1008         m_blobFilter->clear();
1009     m_approximateRecordsSize = 0;
1010
1011     ioQueue().dispatch([this, protectedThis = makeRef(*this), modifiedSinceTime, completionHandler = WTFMove(completionHandler), type = type.isolatedCopy()] () mutable {
1012         auto recordsPath = this->recordsPath();
1013         traverseRecordsFiles(recordsPath, type, [modifiedSinceTime](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
1014             auto filePath = FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
1015             if (modifiedSinceTime > -WallTime::infinity()) {
1016                 auto times = fileTimes(filePath);
1017                 if (times.modification < modifiedSinceTime)
1018                     return;
1019             }
1020             FileSystem::deleteFile(filePath);
1021         });
1022
1023         deleteEmptyRecordsDirectories(recordsPath);
1024
1025         // This cleans unreferenced blobs.
1026         m_blobStorage.synchronize();
1027
1028         RunLoop::main().dispatch(WTFMove(completionHandler));
1029     });
1030 }
1031
1032 static double computeRecordWorth(FileTimes times)
1033 {
1034     auto age = WallTime::now() - times.creation;
1035     // File modification time is updated manually on cache read. We don't use access time since OS may update it automatically.
1036     auto accessAge = times.modification - times.creation;
1037
1038     // For sanity.
1039     if (age <= 0_s || accessAge < 0_s || accessAge > age)
1040         return 0;
1041
1042     // We like old entries that have been accessed recently.
1043     return accessAge / age;
1044 }
1045
1046 static double deletionProbability(FileTimes times, unsigned bodyShareCount)
1047 {
1048     static const double maximumProbability { 0.33 };
1049     static const unsigned maximumEffectiveShareCount { 5 };
1050
1051     auto worth = computeRecordWorth(times);
1052
1053     // Adjust a bit so the most valuable entries don't get deleted at all.
1054     auto effectiveWorth = std::min(1.1 * worth, 1.);
1055
1056     auto probability =  (1 - effectiveWorth) * maximumProbability;
1057
1058     // It is less useful to remove an entry that shares its body data.
1059     if (bodyShareCount)
1060         probability /= std::min(bodyShareCount, maximumEffectiveShareCount);
1061
1062     return probability;
1063 }
1064
1065 void Storage::shrinkIfNeeded()
1066 {
1067     ASSERT(RunLoop::isMain());
1068
1069     // Avoid randomness caused by cache shrinks.
1070     if (m_mode == Mode::AvoidRandomness)
1071         return;
1072
1073     if (approximateSize() > m_capacity)
1074         shrink();
1075 }
1076
1077 void Storage::shrink()
1078 {
1079     ASSERT(RunLoop::isMain());
1080
1081     if (m_shrinkInProgress || m_synchronizationInProgress)
1082         return;
1083     m_shrinkInProgress = true;
1084
1085     LOG(NetworkCacheStorage, "(NetworkProcess) shrinking cache approximateSize=%zu capacity=%zu", approximateSize(), m_capacity);
1086
1087     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
1088         auto recordsPath = this->recordsPath();
1089         String anyType;
1090         traverseRecordsFiles(recordsPath, anyType, [this](const String& fileName, const String& hashString, const String& type, bool isBlob, const String& recordDirectoryPath) {
1091             if (isBlob)
1092                 return;
1093
1094             auto recordPath = FileSystem::pathByAppendingComponent(recordDirectoryPath, fileName);
1095             auto blobPath = blobPathForRecordPath(recordPath);
1096
1097             auto times = fileTimes(recordPath);
1098             unsigned bodyShareCount = m_blobStorage.shareCount(blobPath);
1099             auto probability = deletionProbability(times, bodyShareCount);
1100
1101             bool shouldDelete = randomNumber() < probability;
1102
1103             LOG(NetworkCacheStorage, "Deletion probability=%f bodyLinkCount=%d shouldDelete=%d", probability, bodyShareCount, shouldDelete);
1104
1105             if (shouldDelete) {
1106                 FileSystem::deleteFile(recordPath);
1107                 m_blobStorage.remove(blobPath);
1108             }
1109         });
1110
1111         RunLoop::main().dispatch([this, protectedThis = WTFMove(protectedThis)] {
1112             m_shrinkInProgress = false;
1113             // We could synchronize during the shrink traversal. However this is fast and it is better to have just one code path.
1114             synchronize();
1115         });
1116
1117         LOG(NetworkCacheStorage, "(NetworkProcess) cache shrink completed");
1118     });
1119 }
1120
1121 void Storage::deleteOldVersions()
1122 {
1123     backgroundIOQueue().dispatch([this, protectedThis = makeRef(*this)] () mutable {
1124         auto cachePath = basePath();
1125         traverseDirectory(cachePath, [&cachePath](const String& subdirName, DirectoryEntryType type) {
1126             if (type != DirectoryEntryType::Directory)
1127                 return;
1128             if (!subdirName.startsWith(versionDirectoryPrefix))
1129                 return;
1130             auto versionString = subdirName.substring(strlen(versionDirectoryPrefix));
1131             bool success;
1132             unsigned directoryVersion = versionString.toUIntStrict(&success);
1133             if (!success)
1134                 return;
1135             if (directoryVersion >= version)
1136                 return;
1137 #if PLATFORM(MAC)
1138             if (!AuxiliaryProcess::isSystemWebKit() && directoryVersion == lastStableVersion)
1139                 return;
1140 #endif
1141
1142             auto oldVersionPath = FileSystem::pathByAppendingComponent(cachePath, subdirName);
1143             LOG(NetworkCacheStorage, "(NetworkProcess) deleting old cache version, path %s", oldVersionPath.utf8().data());
1144
1145             deleteDirectoryRecursively(oldVersionPath);
1146         });
1147     });
1148 }
1149
1150 }
1151 }