2ab5f071f3ec88fb1353013ba6930669be6dff47
[WebKit.git] / Source / WebKit / NetworkProcess / Classifier / WebResourceLoadStatisticsTelemetry.cpp
1 /*
2  * Copyright (C) 2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "WebResourceLoadStatisticsTelemetry.h"
28
29 #if ENABLE(RESOURCE_LOAD_STATISTICS)
30
31 #include "ResourceLoadStatisticsMemoryStore.h"
32 #include "WebPageProxy.h"
33 #include "WebProcessPool.h"
34 #include "WebProcessProxy.h"
35 #include <WebCore/DiagnosticLoggingKeys.h>
36 #include <WebCore/ResourceLoadStatistics.h>
37 #include <wtf/MainThread.h>
38 #include <wtf/NeverDestroyed.h>
39 #include <wtf/RunLoop.h>
40 #include <wtf/text/StringBuilder.h>
41
42 namespace WebKit {
43 using namespace WebCore;
44
45 const unsigned minimumPrevalentResourcesForTelemetry = 3;
46 const unsigned significantFiguresForLoggedValues = 3;
47 static bool notifyPagesWhenTelemetryWasCaptured = false;
48
49 struct PrevalentResourceTelemetry {
50     unsigned numberOfTimesDataRecordsRemoved;
51     bool hasHadUserInteraction;
52     unsigned daysSinceUserInteraction;
53     unsigned subframeUnderTopFrameOrigins;
54     unsigned subresourceUnderTopFrameOrigins;
55     unsigned subresourceUniqueRedirectsTo;
56     unsigned timesAccessedAsFirstPartyDueToUserInteraction;
57     unsigned timesAccessedAsFirstPartyDueToStorageAccessAPI;
58 };
59
60 static Vector<PrevalentResourceTelemetry> sortedPrevalentResourceTelemetry(const ResourceLoadStatisticsMemoryStore& store)
61 {
62     ASSERT(!RunLoop::isMain());
63     Vector<PrevalentResourceTelemetry> sorted;
64     store.processStatistics([&sorted] (auto& statistic) {
65         if (!statistic.isPrevalentResource)
66             return;
67
68         unsigned daysSinceUserInteraction = statistic.mostRecentUserInteractionTime <= WallTime() ? 0 : std::floor((WallTime::now() - statistic.mostRecentUserInteractionTime) / 24_h);
69         sorted.append(PrevalentResourceTelemetry {
70             statistic.dataRecordsRemoved,
71             statistic.hadUserInteraction,
72             daysSinceUserInteraction,
73             statistic.subframeUnderTopFrameDomains.size(),
74             statistic.subresourceUnderTopFrameDomains.size(),
75             statistic.subresourceUniqueRedirectsTo.size(),
76             statistic.timesAccessedAsFirstPartyDueToUserInteraction,
77             statistic.timesAccessedAsFirstPartyDueToStorageAccessAPI
78         });
79     });
80
81     if (sorted.size() < minimumPrevalentResourcesForTelemetry)
82         return { };
83
84     std::sort(sorted.begin(), sorted.end(), [](const PrevalentResourceTelemetry& a, const PrevalentResourceTelemetry& b) {
85         return a.subframeUnderTopFrameOrigins + a.subresourceUnderTopFrameOrigins + a.subresourceUniqueRedirectsTo >
86         b.subframeUnderTopFrameOrigins + b.subresourceUnderTopFrameOrigins + b.subresourceUniqueRedirectsTo;
87     });
88
89     return sorted;
90 }
91
92 static unsigned numberOfResourcesWithUserInteraction(const Vector<PrevalentResourceTelemetry>& resources, size_t begin, size_t end)
93 {
94     if (resources.isEmpty() || resources.size() < begin + 1 || resources.size() < end + 1)
95         return 0;
96     
97     unsigned result = 0;
98     for (size_t i = begin; i < end; ++i) {
99         if (resources[i].hasHadUserInteraction)
100             ++result;
101     }
102     
103     return result;
104 }
105     
106 static unsigned median(const Vector<unsigned>& v)
107 {
108     if (v.isEmpty())
109         return 0;
110     if (v.size() == 1)
111         return v[0];
112     
113     auto size = v.size();
114     auto middle = size / 2;
115     if (size % 2)
116         return v[middle];
117     return (v[middle - 1] + v[middle]) / 2;
118 }
119     
120 static unsigned median(const Vector<PrevalentResourceTelemetry>& v, unsigned begin, unsigned end, const WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)>& statisticGetter)
121 {
122     if (v.isEmpty() || v.size() < begin + 1 || v.size() < end + 1)
123         return 0;
124     
125     Vector<unsigned> part;
126     part.reserveInitialCapacity(end - begin + 1);
127     for (unsigned i = begin; i <= end; ++i)
128         part.uncheckedAppend(statisticGetter(v[i]));
129     
130     return median(part);
131 }
132     
133 static void submitTopList(unsigned numberOfResourcesFromTheTop, const Vector<PrevalentResourceTelemetry>& sortedPrevalentResources, const Vector<PrevalentResourceTelemetry>& sortedPrevalentResourcesWithoutUserInteraction, const WebResourceLoadStatisticsStore& store)
134 {
135     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> subframeUnderTopFrameOriginsGetter = [] (auto& t) {
136         return t.subframeUnderTopFrameOrigins;
137     };
138     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> subresourceUnderTopFrameOriginsGetter = [] (auto& t) {
139         return t.subresourceUnderTopFrameOrigins;
140     };
141     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> subresourceUniqueRedirectsToGetter = [] (auto& t) {
142         return t.subresourceUniqueRedirectsTo;
143     };
144     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> numberOfTimesDataRecordsRemovedGetter = [] (auto& t) {
145         return t.numberOfTimesDataRecordsRemoved;
146     };
147     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> numberOfTimesAccessedAsFirstPartyDueToUserInteractionGetter = [] (auto& t) {
148         return t.timesAccessedAsFirstPartyDueToUserInteraction;
149     };
150     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> numberOfTimesAccessedAsFirstPartyDueToStorageAccessAPIGetter = [] (auto& t) {
151         return t.timesAccessedAsFirstPartyDueToStorageAccessAPI;
152     };
153
154     unsigned topPrevalentResourcesWithUserInteraction = numberOfResourcesWithUserInteraction(sortedPrevalentResources, 0, numberOfResourcesFromTheTop - 1);
155     unsigned topSubframeUnderTopFrameOrigins = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, subframeUnderTopFrameOriginsGetter);
156     unsigned topSubresourceUnderTopFrameOrigins = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, subresourceUnderTopFrameOriginsGetter);
157     unsigned topSubresourceUniqueRedirectsTo = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, subresourceUniqueRedirectsToGetter);
158     unsigned topNumberOfTimesDataRecordsRemoved = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, numberOfTimesDataRecordsRemovedGetter);
159     unsigned topNumberOfTimesAccessedAsFirstPartyDueToUserInteraction = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, numberOfTimesAccessedAsFirstPartyDueToUserInteractionGetter);
160     unsigned topNumberOfTimesAccessedAsFirstPartyDueToStorageAccessAPI = median(sortedPrevalentResourcesWithoutUserInteraction, 0, numberOfResourcesFromTheTop - 1, numberOfTimesAccessedAsFirstPartyDueToStorageAccessAPIGetter);
161
162     StringBuilder preambleBuilder;
163     preambleBuilder.appendLiteral("top");
164     preambleBuilder.appendNumber(numberOfResourcesFromTheTop);
165     String descriptionPreamble = preambleBuilder.toString();
166     
167     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "PrevalentResourcesWithUserInteraction",
168         topPrevalentResourcesWithUserInteraction, significantFiguresForLoggedValues, ShouldSample::No);
169     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "SubframeUnderTopFrameOrigins",
170         topSubframeUnderTopFrameOrigins, significantFiguresForLoggedValues, ShouldSample::No);
171     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "SubresourceUnderTopFrameOrigins",
172         topSubresourceUnderTopFrameOrigins, significantFiguresForLoggedValues, ShouldSample::No);
173     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "SubresourceUniqueRedirectsTo",
174         topSubresourceUniqueRedirectsTo, significantFiguresForLoggedValues, ShouldSample::No);
175     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "NumberOfTimesDataRecordsRemoved",
176         topNumberOfTimesDataRecordsRemoved, significantFiguresForLoggedValues, ShouldSample::No);
177     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "NumberOfTimesAccessedAsFirstPartyDueToUserInteraction",
178         topNumberOfTimesAccessedAsFirstPartyDueToUserInteraction, significantFiguresForLoggedValues, ShouldSample::No);
179     store.sendDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), descriptionPreamble + "NumberOfTimesAccessedAsFirstPartyDueToStorageAccessAPI",
180         topNumberOfTimesAccessedAsFirstPartyDueToStorageAccessAPI, significantFiguresForLoggedValues, ShouldSample::No);
181 }
182     
183 static void submitTopLists(const Vector<PrevalentResourceTelemetry>& sortedPrevalentResources, const Vector<PrevalentResourceTelemetry>& sortedPrevalentResourcesWithoutUserInteraction, const WebResourceLoadStatisticsStore& store)
184 {
185     submitTopList(1, sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, store);
186     
187     if (sortedPrevalentResourcesWithoutUserInteraction.size() < 3)
188         return;
189     submitTopList(3, sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, store);
190     
191     if (sortedPrevalentResourcesWithoutUserInteraction.size() < 10)
192         return;
193     submitTopList(10, sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, store);
194     
195     if (sortedPrevalentResourcesWithoutUserInteraction.size() < 50)
196         return;
197     submitTopList(50, sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, store);
198     
199     if (sortedPrevalentResourcesWithoutUserInteraction.size() < 100)
200         return;
201     submitTopList(100, sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, store);
202 }
203     
204 // This function is for testing purposes.
205 void static notifyPages(unsigned totalPrevalentResources, unsigned totalPrevalentResourcesWithUserInteraction, unsigned top3SubframeUnderTopFrameOrigins, const WebResourceLoadStatisticsStore& store)
206 {
207     RunLoop::main().dispatch([totalPrevalentResources, totalPrevalentResourcesWithUserInteraction, top3SubframeUnderTopFrameOrigins, store = makeRef(store)] {
208         store->notifyPageStatisticsTelemetryFinished(totalPrevalentResources, totalPrevalentResourcesWithUserInteraction, top3SubframeUnderTopFrameOrigins);
209     });
210 }
211     
212 // This function is for testing purposes.
213 void static notifyPages(const Vector<PrevalentResourceTelemetry>& sortedPrevalentResources, const Vector<PrevalentResourceTelemetry>& sortedPrevalentResourcesWithoutUserInteraction, unsigned totalNumberOfPrevalentResourcesWithUserInteraction, const WebResourceLoadStatisticsStore& store)
214 {
215     WTF::Function<unsigned(const PrevalentResourceTelemetry& telemetry)> subframeUnderTopFrameOriginsGetter = [] (const PrevalentResourceTelemetry& t) {
216         return t.subframeUnderTopFrameOrigins;
217     };
218     
219     notifyPages(sortedPrevalentResources.size(), totalNumberOfPrevalentResourcesWithUserInteraction, median(sortedPrevalentResourcesWithoutUserInteraction, 0, 2, subframeUnderTopFrameOriginsGetter), store);
220 }
221     
222 void WebResourceLoadStatisticsTelemetry::calculateAndSubmit(const ResourceLoadStatisticsMemoryStore& resourceLoadStatisticsStore)
223 {
224     ASSERT(!RunLoop::isMain());
225     
226     auto sortedPrevalentResources = sortedPrevalentResourceTelemetry(resourceLoadStatisticsStore);
227     if (notifyPagesWhenTelemetryWasCaptured && sortedPrevalentResources.isEmpty()) {
228         notifyPages(0, 0, 0, resourceLoadStatisticsStore.store());
229         return;
230     }
231     
232     Vector<PrevalentResourceTelemetry> sortedPrevalentResourcesWithoutUserInteraction;
233     sortedPrevalentResourcesWithoutUserInteraction.reserveInitialCapacity(sortedPrevalentResources.size());
234     Vector<unsigned> prevalentResourcesDaysSinceUserInteraction;
235     
236     for (auto& prevalentResource : sortedPrevalentResources) {
237         if (prevalentResource.hasHadUserInteraction)
238             prevalentResourcesDaysSinceUserInteraction.append(prevalentResource.daysSinceUserInteraction);
239         else
240             sortedPrevalentResourcesWithoutUserInteraction.uncheckedAppend(prevalentResource);
241     }
242     
243     // Dispatch on the main thread to make sure the WebPageProxy we're using doesn't go away.
244     RunLoop::main().dispatch([sortedPrevalentResources = WTFMove(sortedPrevalentResources), sortedPrevalentResourcesWithoutUserInteraction = WTFMove(sortedPrevalentResourcesWithoutUserInteraction), prevalentResourcesDaysSinceUserInteraction = WTFMove(prevalentResourcesDaysSinceUserInteraction), resourceLoadStatisticsStore = makeWeakPtr(resourceLoadStatisticsStore)] () {
245         if (!resourceLoadStatisticsStore)
246             return;
247
248         auto webPageProxy = WebPageProxy::nonEphemeralWebPageProxy();
249         if (!webPageProxy) {
250             if (notifyPagesWhenTelemetryWasCaptured)
251                 notifyPages(0, 0, 0, resourceLoadStatisticsStore->store());
252             return;
253         }
254         
255         if (notifyPagesWhenTelemetryWasCaptured) {
256             notifyPages(sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, prevalentResourcesDaysSinceUserInteraction.size(), resourceLoadStatisticsStore->store());
257             // The notify pages function is for testing so we don't need to do an actual submission.
258             return;
259         }
260         
261         webPageProxy->logDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), "totalNumberOfPrevalentResources"_s, sortedPrevalentResources.size(), significantFiguresForLoggedValues, ShouldSample::No);
262         webPageProxy->logDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), "totalNumberOfPrevalentResourcesWithUserInteraction"_s, prevalentResourcesDaysSinceUserInteraction.size(), significantFiguresForLoggedValues, ShouldSample::No);
263         
264         if (prevalentResourcesDaysSinceUserInteraction.size() > 0)
265             webPageProxy->logDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), "topPrevalentResourceWithUserInteractionDaysSinceUserInteraction"_s, prevalentResourcesDaysSinceUserInteraction[0], significantFiguresForLoggedValues, ShouldSample::No);
266         if (prevalentResourcesDaysSinceUserInteraction.size() > 1)
267             webPageProxy->logDiagnosticMessageWithValue(DiagnosticLoggingKeys::resourceLoadStatisticsTelemetryKey(), "medianPrevalentResourcesWithUserInteractionDaysSinceUserInteraction"_s, median(prevalentResourcesDaysSinceUserInteraction), significantFiguresForLoggedValues, ShouldSample::No);
268         
269         submitTopLists(sortedPrevalentResources, sortedPrevalentResourcesWithoutUserInteraction, resourceLoadStatisticsStore->store());
270     });
271 }
272     
273 void WebResourceLoadStatisticsTelemetry::setNotifyPagesWhenTelemetryWasCaptured(bool always)
274 {
275     notifyPagesWhenTelemetryWasCaptured = always;
276 }
277     
278 }
279
280 #endif