Resource load statistics: Cover further data records, count removed data records...
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadObserver.cpp
1 /*
2  * Copyright (C) 2016 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadObserver.h"
28
29 #include "Document.h"
30 #include "Frame.h"
31 #include "Logging.h"
32 #include "MainFrame.h"
33 #include "NetworkStorageSession.h"
34 #include "Page.h"
35 #include "PlatformStrategies.h"
36 #include "PublicSuffix.h"
37 #include "ResourceLoadStatistics.h"
38 #include "ResourceLoadStatisticsStore.h"
39 #include "ResourceRequest.h"
40 #include "ResourceResponse.h"
41 #include "SecurityOrigin.h"
42 #include "Settings.h"
43 #include "SharedBuffer.h"
44 #include "URL.h"
45 #include <wtf/NeverDestroyed.h>
46 #include <wtf/text/StringBuilder.h>
47
48 namespace WebCore {
49
50 ResourceLoadObserver& ResourceLoadObserver::sharedObserver()
51 {
52     static NeverDestroyed<ResourceLoadObserver> resourceLoadObserver;
53     return resourceLoadObserver;
54 }
55
56 void ResourceLoadObserver::setStatisticsStore(Ref<ResourceLoadStatisticsStore>&& store)
57 {
58     m_store = WTFMove(store);
59 }
60
61 static inline bool is3xxRedirect(const ResourceResponse& response)
62 {
63     return response.httpStatusCode() >= 300 && response.httpStatusCode() <= 399;
64 }
65
66 bool ResourceLoadObserver::shouldLog(Page* page)
67 {
68     // FIXME: Err on the safe side until we have sorted out what to do in worker contexts
69     if (!page)
70         return false;
71     return Settings::resourceLoadStatisticsEnabled()
72         && !page->usesEphemeralSession()
73         && m_store;
74 }
75
76 void ResourceLoadObserver::logFrameNavigation(const Frame& frame, const Frame& topFrame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
77 {
78     ASSERT(frame.document());
79     ASSERT(topFrame.document());
80     ASSERT(topFrame.page());
81     
82     if (!shouldLog(topFrame.page()))
83         return;
84
85     bool isRedirect = is3xxRedirect(redirectResponse);
86     bool isMainFrame = frame.isMainFrame();
87     const URL& sourceURL = frame.document()->url();
88     const URL& targetURL = newRequest.url();
89     const URL& mainFrameURL = topFrame.document()->url();
90     
91     if (!targetURL.isValid() || !mainFrameURL.isValid())
92         return;
93
94     auto targetHost = targetURL.host();
95     auto mainFrameHost = mainFrameURL.host();
96
97     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || targetHost == sourceURL.host())
98         return;
99
100     auto targetPrimaryDomain = primaryDomain(targetURL);
101     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
102     auto sourcePrimaryDomain = primaryDomain(sourceURL);
103     
104     if (targetPrimaryDomain == mainFramePrimaryDomain || targetPrimaryDomain == sourcePrimaryDomain)
105         return;
106
107     auto targetOrigin = SecurityOrigin::create(targetURL);
108     auto targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
109
110     // Always fire if we have previously removed data records for this domain
111     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
112
113     if (isMainFrame)
114         targetStatistics.topFrameHasBeenNavigatedToBefore = true;
115     else {
116         targetStatistics.subframeHasBeenLoadedBefore = true;
117
118         auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
119         auto subframeUnderTopFrameOriginsResult = targetStatistics.subframeUnderTopFrameOrigins.add(mainFramePrimaryDomain);
120         if (subframeUnderTopFrameOriginsResult.isNewEntry)
121             shouldFireDataModificationHandler = true;
122     }
123     
124     if (isRedirect) {
125         auto& redirectingOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
126         
127         if (m_store->isPrevalentResource(targetPrimaryDomain))
128             redirectingOriginResourceStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
129         
130         if (isMainFrame) {
131             ++targetStatistics.topFrameHasBeenRedirectedTo;
132             ++redirectingOriginResourceStatistics.topFrameHasBeenRedirectedFrom;
133         } else {
134             ++targetStatistics.subframeHasBeenRedirectedTo;
135             ++redirectingOriginResourceStatistics.subframeHasBeenRedirectedFrom;
136             redirectingOriginResourceStatistics.subframeUniqueRedirectsTo.add(targetPrimaryDomain);
137             
138             ++targetStatistics.subframeSubResourceCount;
139         }
140     } else {
141         if (sourcePrimaryDomain.isNull() || sourcePrimaryDomain.isEmpty() || sourcePrimaryDomain == "nullOrigin") {
142             if (isMainFrame)
143                 ++targetStatistics.topFrameInitialLoadCount;
144             else
145                 ++targetStatistics.subframeSubResourceCount;
146         } else {
147             auto& sourceOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
148
149             if (isMainFrame) {
150                 ++sourceOriginResourceStatistics.topFrameHasBeenNavigatedFrom;
151                 ++targetStatistics.topFrameHasBeenNavigatedTo;
152             } else {
153                 ++sourceOriginResourceStatistics.subframeHasBeenNavigatedFrom;
154                 ++targetStatistics.subframeHasBeenNavigatedTo;
155             }
156         }
157     }
158
159     m_store->setResourceStatisticsForPrimaryDomain(targetPrimaryDomain, WTFMove(targetStatistics));
160     if (shouldFireDataModificationHandler)
161         m_store->fireDataModificationHandler();
162 }
163     
164 void ResourceLoadObserver::logSubresourceLoading(const Frame* frame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
165 {
166     ASSERT(frame->page());
167
168     if (!shouldLog(frame->page()))
169         return;
170
171     bool isRedirect = is3xxRedirect(redirectResponse);
172     const URL& sourceURL = redirectResponse.url();
173     const URL& targetURL = newRequest.url();
174     const URL& mainFrameURL = frame ? frame->mainFrame().document()->url() : URL();
175     
176     auto targetHost = targetURL.host();
177     auto mainFrameHost = mainFrameURL.host();
178
179     if (targetHost.isEmpty()
180         || mainFrameHost.isEmpty()
181         || targetHost == mainFrameHost
182         || (isRedirect && targetHost == sourceURL.host()))
183         return;
184
185     auto targetPrimaryDomain = primaryDomain(targetURL);
186     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
187     auto sourcePrimaryDomain = primaryDomain(sourceURL);
188     
189     if (targetPrimaryDomain == mainFramePrimaryDomain || (isRedirect && targetPrimaryDomain == sourcePrimaryDomain))
190         return;
191
192     auto& targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
193
194     // Always fire if we have previously removed data records for this domain
195     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
196
197     auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
198     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
199     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
200         shouldFireDataModificationHandler = true;
201
202     if (isRedirect) {
203         auto& redirectingOriginStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
204         
205         // We just inserted to the store, so we need to reget 'targetStatistics'
206         auto& updatedTargetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
207
208         if (m_store->isPrevalentResource(targetPrimaryDomain))
209             redirectingOriginStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
210         
211         ++redirectingOriginStatistics.subresourceHasBeenRedirectedFrom;
212         ++updatedTargetStatistics.subresourceHasBeenRedirectedTo;
213
214         auto subresourceUniqueRedirectsToResult = redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain);
215         if (subresourceUniqueRedirectsToResult.isNewEntry)
216             shouldFireDataModificationHandler = true;
217
218         ++updatedTargetStatistics.subresourceHasBeenSubresourceCount;
219
220         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
221         
222         updatedTargetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(updatedTargetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
223     } else {
224         ++targetStatistics.subresourceHasBeenSubresourceCount;
225
226         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
227         
228         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
229     }
230
231     if (shouldFireDataModificationHandler)
232         m_store->fireDataModificationHandler();
233 }
234
235 void ResourceLoadObserver::logWebSocketLoading(const Frame* frame, const URL& targetURL)
236 {
237     // FIXME: Web sockets can run in detached frames. Decide how to count such connections.
238     // See LayoutTests/http/tests/websocket/construct-in-detached-frame.html
239     if (!frame)
240         return;
241
242     if (!shouldLog(frame->page()))
243         return;
244
245     const URL& mainFrameURL = frame->mainFrame().document()->url();
246
247     auto targetHost = targetURL.host();
248     auto mainFrameHost = mainFrameURL.host();
249     
250     if (targetHost.isEmpty()
251         || mainFrameHost.isEmpty()
252         || targetHost == mainFrameHost)
253         return;
254     
255     auto targetPrimaryDomain = primaryDomain(targetURL);
256     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
257     
258     if (targetPrimaryDomain == mainFramePrimaryDomain)
259         return;
260
261     auto& targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
262
263     // Always fire if we have previously removed data records for this domain
264     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
265     
266     auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
267     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
268     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
269         shouldFireDataModificationHandler = true;
270
271     ++targetStatistics.subresourceHasBeenSubresourceCount;
272     
273     auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
274     
275     targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
276
277     if (shouldFireDataModificationHandler)
278         m_store->fireDataModificationHandler();
279 }
280
281 void ResourceLoadObserver::logUserInteraction(const Document& document)
282 {
283     ASSERT(document.page());
284
285     if (!shouldLog(document.page()))
286         return;
287
288     auto& url = document.url();
289
290     if (url.isBlankURL() || url.isEmpty())
291         return;
292
293     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
294     statistics.hadUserInteraction = true;
295     m_store->fireDataModificationHandler();
296 }
297     
298 String ResourceLoadObserver::primaryDomain(const URL& url)
299 {
300     String primaryDomain;
301     String host = url.host();
302     if (host.isNull() || host.isEmpty())
303         primaryDomain = "nullOrigin";
304 #if ENABLE(PUBLIC_SUFFIX_LIST)
305     else {
306         primaryDomain = topPrivatelyControlledDomain(host);
307         // We will have an empty string here if there is no TLD.
308         // Use the host in such case.
309         if (primaryDomain.isEmpty())
310             primaryDomain = host;
311     }
312 #else
313     else
314         primaryDomain = host;
315 #endif
316
317     return primaryDomain;
318 }
319
320 String ResourceLoadObserver::statisticsForOrigin(const String& origin)
321 {
322     return m_store ? m_store->statisticsForOrigin(origin) : emptyString();
323 }
324
325 }