Move ResourceLoadStatisticsStore to WebKit2/UIProcess
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadObserver.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadObserver.h"
28
29 #include "Document.h"
30 #include "Frame.h"
31 #include "Logging.h"
32 #include "MainFrame.h"
33 #include "Page.h"
34 #include "ResourceLoadStatistics.h"
35 #include "ResourceRequest.h"
36 #include "ResourceResponse.h"
37 #include "SecurityOrigin.h"
38 #include "Settings.h"
39 #include "URL.h"
40 #include <wtf/NeverDestroyed.h>
41
42 namespace WebCore {
43
44 template<typename T> static inline String primaryDomain(const T& value)
45 {
46     return ResourceLoadStatistics::primaryDomain(value);
47 }
48
49 static Seconds timestampResolution { 1_h };
50
51 ResourceLoadObserver& ResourceLoadObserver::shared()
52 {
53     static NeverDestroyed<ResourceLoadObserver> resourceLoadObserver;
54     return resourceLoadObserver;
55 }
56
57 void ResourceLoadObserver::setNotificationCallback(WTF::Function<void()>&& notificationCallback)
58 {
59     ASSERT(!m_notificationCallback);
60     m_notificationCallback = WTFMove(notificationCallback);
61 }
62
63 static inline bool is3xxRedirect(const ResourceResponse& response)
64 {
65     return response.httpStatusCode() >= 300 && response.httpStatusCode() <= 399;
66 }
67
68 bool ResourceLoadObserver::shouldLog(Page* page) const
69 {
70     // FIXME: Err on the safe side until we have sorted out what to do in worker contexts
71     if (!page)
72         return false;
73
74     return Settings::resourceLoadStatisticsEnabled() && !page->usesEphemeralSession() && m_notificationCallback;
75 }
76
77 void ResourceLoadObserver::logFrameNavigation(const Frame& frame, const Frame& topFrame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
78 {
79     ASSERT(frame.document());
80     ASSERT(topFrame.document());
81     ASSERT(topFrame.page());
82     
83     if (!shouldLog(topFrame.page()))
84         return;
85
86     bool isRedirect = is3xxRedirect(redirectResponse);
87     bool isMainFrame = frame.isMainFrame();
88     auto& sourceURL = frame.document()->url();
89     auto& targetURL = newRequest.url();
90     auto& mainFrameURL = topFrame.document()->url();
91     
92     if (!targetURL.isValid() || !mainFrameURL.isValid())
93         return;
94
95     auto targetHost = targetURL.host();
96     auto mainFrameHost = mainFrameURL.host();
97
98     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || targetHost == sourceURL.host())
99         return;
100
101     auto targetPrimaryDomain = primaryDomain(targetURL);
102     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
103     auto sourcePrimaryDomain = primaryDomain(sourceURL);
104     
105     if (targetPrimaryDomain == mainFramePrimaryDomain || targetPrimaryDomain == sourcePrimaryDomain)
106         return;
107
108     auto targetStatistics = takeResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
109
110     // Always fire if we have previously removed data records for this domain
111     bool shouldCallNotificationCallback = targetStatistics.dataRecordsRemoved > 0;
112
113     if (isMainFrame)
114         targetStatistics.topFrameHasBeenNavigatedToBefore = true;
115     else {
116         targetStatistics.subframeHasBeenLoadedBefore = true;
117
118         auto subframeUnderTopFrameOriginsResult = targetStatistics.subframeUnderTopFrameOrigins.add(mainFramePrimaryDomain);
119         if (subframeUnderTopFrameOriginsResult.isNewEntry)
120             shouldCallNotificationCallback = true;
121     }
122
123     if (isRedirect) {
124         auto& redirectingOriginResourceStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
125
126         if (isPrevalentResource(targetPrimaryDomain))
127             redirectingOriginResourceStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
128
129         if (isMainFrame) {
130             ++targetStatistics.topFrameHasBeenRedirectedTo;
131             ++redirectingOriginResourceStatistics.topFrameHasBeenRedirectedFrom;
132         } else {
133             ++targetStatistics.subframeHasBeenRedirectedTo;
134             ++redirectingOriginResourceStatistics.subframeHasBeenRedirectedFrom;
135             redirectingOriginResourceStatistics.subframeUniqueRedirectsTo.add(targetPrimaryDomain);
136
137             ++targetStatistics.subframeSubResourceCount;
138         }
139     } else {
140         if (sourcePrimaryDomain.isNull() || sourcePrimaryDomain.isEmpty() || sourcePrimaryDomain == "nullOrigin") {
141             if (isMainFrame)
142                 ++targetStatistics.topFrameInitialLoadCount;
143             else
144                 ++targetStatistics.subframeSubResourceCount;
145         } else {
146             auto& sourceOriginResourceStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
147
148             if (isMainFrame) {
149                 ++sourceOriginResourceStatistics.topFrameHasBeenNavigatedFrom;
150                 ++targetStatistics.topFrameHasBeenNavigatedTo;
151             } else {
152                 ++sourceOriginResourceStatistics.subframeHasBeenNavigatedFrom;
153                 ++targetStatistics.subframeHasBeenNavigatedTo;
154             }
155         }
156     }
157
158     m_resourceStatisticsMap.set(targetPrimaryDomain, WTFMove(targetStatistics));
159
160     if (shouldCallNotificationCallback)
161         m_notificationCallback();
162 }
163     
164 void ResourceLoadObserver::logSubresourceLoading(const Frame* frame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
165 {
166     ASSERT(frame->page());
167
168     if (!shouldLog(frame->page()))
169         return;
170
171     bool isRedirect = is3xxRedirect(redirectResponse);
172     const URL& sourceURL = redirectResponse.url();
173     const URL& targetURL = newRequest.url();
174     const URL& mainFrameURL = frame ? frame->mainFrame().document()->url() : URL();
175     
176     auto targetHost = targetURL.host();
177     auto mainFrameHost = mainFrameURL.host();
178
179     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || (isRedirect && targetHost == sourceURL.host()))
180         return;
181
182     auto targetPrimaryDomain = primaryDomain(targetURL);
183     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
184     auto sourcePrimaryDomain = primaryDomain(sourceURL);
185     
186     if (targetPrimaryDomain == mainFramePrimaryDomain || (isRedirect && targetPrimaryDomain == sourcePrimaryDomain))
187         return;
188
189     auto targetStatistics = takeResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
190
191     // Always fire if we have previously removed data records for this domain
192     bool shouldCallNotificationCallback = targetStatistics.dataRecordsRemoved > 0;
193
194     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
195     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
196         shouldCallNotificationCallback = true;
197
198     if (isRedirect) {
199         auto& redirectingOriginStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
200
201         if (isPrevalentResource(targetPrimaryDomain))
202             redirectingOriginStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
203
204         ++redirectingOriginStatistics.subresourceHasBeenRedirectedFrom;
205         ++targetStatistics.subresourceHasBeenRedirectedTo;
206
207         auto subresourceUniqueRedirectsToResult = redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain);
208         if (subresourceUniqueRedirectsToResult.isNewEntry)
209             shouldCallNotificationCallback = true;
210
211         ++targetStatistics.subresourceHasBeenSubresourceCount;
212
213         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
214
215         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
216     } else {
217         ++targetStatistics.subresourceHasBeenSubresourceCount;
218
219         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
220
221         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
222     }
223
224     m_resourceStatisticsMap.set(targetPrimaryDomain, WTFMove(targetStatistics));
225
226     if (shouldCallNotificationCallback)
227         m_notificationCallback();
228 }
229
230 void ResourceLoadObserver::logWebSocketLoading(const Frame* frame, const URL& targetURL)
231 {
232     // FIXME: Web sockets can run in detached frames. Decide how to count such connections.
233     // See LayoutTests/http/tests/websocket/construct-in-detached-frame.html
234     if (!frame)
235         return;
236
237     if (!shouldLog(frame->page()))
238         return;
239
240     auto& mainFrameURL = frame->mainFrame().document()->url();
241
242     auto targetHost = targetURL.host();
243     auto mainFrameHost = mainFrameURL.host();
244     
245     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost)
246         return;
247     
248     auto targetPrimaryDomain = primaryDomain(targetURL);
249     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
250     
251     if (targetPrimaryDomain == mainFramePrimaryDomain)
252         return;
253
254     auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
255
256     // Always fire if we have previously removed data records for this domain
257     bool shouldCallNotificationCallback = targetStatistics.dataRecordsRemoved > 0;
258
259     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
260     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
261         shouldCallNotificationCallback = true;
262
263     ++targetStatistics.subresourceHasBeenSubresourceCount;
264
265     auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
266
267     targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
268
269     if (shouldCallNotificationCallback)
270         m_notificationCallback();
271 }
272
273 static WallTime reduceTimeResolution(WallTime time)
274 {
275     return WallTime::fromRawSeconds(std::floor(time.secondsSinceEpoch() / timestampResolution) * timestampResolution.seconds());
276 }
277
278 void ResourceLoadObserver::logUserInteractionWithReducedTimeResolution(const Document& document)
279 {
280     ASSERT(document.page());
281
282     if (!shouldLog(document.page()))
283         return;
284
285     auto& url = document.url();
286     if (url.isBlankURL() || url.isEmpty())
287         return;
288
289     auto& statistics = ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
290     auto newTime = reduceTimeResolution(WallTime::now());
291     if (newTime == statistics.mostRecentUserInteractionTime)
292         return;
293
294     statistics.hadUserInteraction = true;
295     statistics.mostRecentUserInteractionTime = newTime;
296
297     m_notificationCallback();
298 }
299
300 ResourceLoadStatistics& ResourceLoadObserver::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
301 {
302     auto addResult = m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
303         return ResourceLoadStatistics(primaryDomain);
304     });
305     return addResult.iterator->value;
306 }
307
308 ResourceLoadStatistics ResourceLoadObserver::takeResourceStatisticsForPrimaryDomain(const String& primaryDomain)
309 {
310     auto statististics = m_resourceStatisticsMap.take(primaryDomain);
311     if (statististics.highLevelDomain.isNull())
312         statististics.highLevelDomain = primaryDomain;
313     ASSERT(statististics.highLevelDomain == primaryDomain);
314     return statististics;
315 }
316
317 bool ResourceLoadObserver::isPrevalentResource(const String& primaryDomain) const
318 {
319     auto mapEntry = m_resourceStatisticsMap.find(primaryDomain);
320     if (mapEntry == m_resourceStatisticsMap.end())
321         return false;
322     return mapEntry->value.isPrevalentResource;
323 }
324
325 String ResourceLoadObserver::statisticsForOrigin(const String& origin)
326 {
327     auto iter = m_resourceStatisticsMap.find(origin);
328     if (iter == m_resourceStatisticsMap.end())
329         return emptyString();
330
331     return "Statistics for " + origin + ":\n" + iter->value.toString();
332 }
333
334 Vector<ResourceLoadStatistics> ResourceLoadObserver::takeStatistics()
335 {
336     Vector<ResourceLoadStatistics> statistics;
337     statistics.reserveInitialCapacity(m_resourceStatisticsMap.size());
338     for (auto& statistic : m_resourceStatisticsMap.values())
339         statistics.uncheckedAppend(WTFMove(statistic));
340
341     m_resourceStatisticsMap.clear();
342
343     return statistics;
344 }
345
346 } // namespace WebCore