Don't use invalidated ResourceLoadStatistics iterators
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadObserver.cpp
1 /*
2  * Copyright (C) 2016 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadObserver.h"
28
29 #include "Document.h"
30 #include "Frame.h"
31 #include "Logging.h"
32 #include "MainFrame.h"
33 #include "NetworkStorageSession.h"
34 #include "Page.h"
35 #include "PlatformStrategies.h"
36 #include "ResourceLoadStatistics.h"
37 #include "ResourceLoadStatisticsStore.h"
38 #include "ResourceRequest.h"
39 #include "ResourceResponse.h"
40 #include "SecurityOrigin.h"
41 #include "Settings.h"
42 #include "SharedBuffer.h"
43 #include "URL.h"
44 #include <wtf/NeverDestroyed.h>
45 #include <wtf/text/StringBuilder.h>
46
47 namespace WebCore {
48
49 ResourceLoadObserver& ResourceLoadObserver::sharedObserver()
50 {
51     static NeverDestroyed<ResourceLoadObserver> resourceLoadObserver;
52     return resourceLoadObserver;
53 }
54
55 void ResourceLoadObserver::setStatisticsStore(Ref<ResourceLoadStatisticsStore>&& store)
56 {
57     m_store = WTFMove(store);
58 }
59
60 void ResourceLoadObserver::logFrameNavigation(const Frame& frame, const Frame& topFrame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
61 {
62     if (!Settings::resourceLoadStatisticsEnabled())
63         return;
64
65     if (!m_store)
66         return;
67
68     ASSERT(frame.document());
69     ASSERT(topFrame.document());
70     ASSERT(topFrame.page());
71
72     bool needPrivacy = topFrame.page() ? topFrame.page()->usesEphemeralSession() : false;
73     if (needPrivacy)
74         return;
75
76     bool isRedirect = !redirectResponse.isNull();
77     bool isMainFrame = frame.isMainFrame();
78     const URL& sourceURL = frame.document()->url();
79     const URL& targetURL = newRequest.url();
80     const URL& mainFrameURL = topFrame.document()->url();
81     
82     if (!targetURL.isValid() || !mainFrameURL.isValid())
83         return;
84
85     auto targetHost = targetURL.host();
86     auto mainFrameHost = mainFrameURL.host();
87
88     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || targetHost == sourceURL.host())
89         return;
90
91     auto targetPrimaryDomain = primaryDomain(targetURL);
92     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
93     auto sourcePrimaryDomain = primaryDomain(sourceURL);
94     
95     if (targetPrimaryDomain == mainFramePrimaryDomain || targetPrimaryDomain == sourcePrimaryDomain)
96         return;
97     
98     auto targetOrigin = SecurityOrigin::create(targetURL);
99     auto targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
100     
101     if (isMainFrame)
102         targetStatistics.topFrameHasBeenNavigatedToBefore = true;
103     else {
104         targetStatistics.subframeHasBeenLoadedBefore = true;
105
106         auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
107         targetStatistics.subframeUnderTopFrameOrigins.add(mainFramePrimaryDomain);
108     }
109     
110     if (isRedirect) {
111         auto& redirectingOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
112         
113         if (m_store->isPrevalentResource(targetPrimaryDomain))
114             redirectingOriginResourceStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
115         
116         if (isMainFrame) {
117             ++targetStatistics.topFrameHasBeenRedirectedTo;
118             ++redirectingOriginResourceStatistics.topFrameHasBeenRedirectedFrom;
119         } else {
120             ++targetStatistics.subframeHasBeenRedirectedTo;
121             ++redirectingOriginResourceStatistics.subframeHasBeenRedirectedFrom;
122             redirectingOriginResourceStatistics.subframeUniqueRedirectsTo.add(targetPrimaryDomain);
123             
124             ++targetStatistics.subframeSubResourceCount;
125         }
126     } else {
127         if (sourcePrimaryDomain.isNull() || sourcePrimaryDomain.isEmpty() || sourcePrimaryDomain == "nullOrigin") {
128             if (isMainFrame)
129                 ++targetStatistics.topFrameInitialLoadCount;
130             else
131                 ++targetStatistics.subframeSubResourceCount;
132         } else {
133             auto& sourceOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
134
135             if (isMainFrame) {
136                 ++sourceOriginResourceStatistics.topFrameHasBeenNavigatedFrom;
137                 ++targetStatistics.topFrameHasBeenNavigatedTo;
138             } else {
139                 ++sourceOriginResourceStatistics.subframeHasBeenNavigatedFrom;
140                 ++targetStatistics.subframeHasBeenNavigatedTo;
141             }
142         }
143     }
144
145     m_store->setResourceStatisticsForPrimaryDomain(targetPrimaryDomain, WTFMove(targetStatistics));
146     m_store->fireDataModificationHandler();
147 }
148     
149 void ResourceLoadObserver::logSubresourceLoading(const Frame* frame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
150 {
151     if (!Settings::resourceLoadStatisticsEnabled())
152         return;
153
154     if (!m_store)
155         return;
156
157     bool needPrivacy = (frame && frame->page()) ? frame->page()->usesEphemeralSession() : false;
158     if (needPrivacy)
159         return;
160     
161     bool isRedirect = !redirectResponse.isNull();
162     const URL& sourceURL = redirectResponse.url();
163     const URL& targetURL = newRequest.url();
164     const URL& mainFrameURL = frame ? frame->mainFrame().document()->url() : URL();
165     
166     auto targetHost = targetURL.host();
167     auto mainFrameHost = mainFrameURL.host();
168
169     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || targetHost == sourceURL.host())
170         return;
171
172     auto targetPrimaryDomain = primaryDomain(targetURL);
173     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
174     auto sourcePrimaryDomain = primaryDomain(sourceURL);
175     
176     if (targetPrimaryDomain == mainFramePrimaryDomain || targetPrimaryDomain == sourcePrimaryDomain)
177         return;
178
179     auto& targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
180
181     auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
182     targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
183
184     if (isRedirect) {
185         auto& redirectingOriginStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
186         
187         if (m_store->isPrevalentResource(targetPrimaryDomain))
188             redirectingOriginStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
189         
190         ++redirectingOriginStatistics.subresourceHasBeenRedirectedFrom;
191         ++targetStatistics.subresourceHasBeenRedirectedTo;
192
193         redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain);
194
195         ++targetStatistics.subresourceHasBeenSubresourceCount;
196
197         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
198         
199         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
200     } else {
201         ++targetStatistics.subresourceHasBeenSubresourceCount;
202
203         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
204         
205         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
206     }
207     
208     m_store->fireDataModificationHandler();
209 }
210     
211 void ResourceLoadObserver::logUserInteraction(const Document& document)
212 {
213     if (!Settings::resourceLoadStatisticsEnabled())
214         return;
215
216     if (!m_store)
217         return;
218
219     bool needPrivacy = document.page() ? document.page()->usesEphemeralSession() : false;
220     if (needPrivacy)
221         return;
222
223     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(document.url()));
224     statistics.hadUserInteraction = true;
225     m_store->fireDataModificationHandler();
226 }
227     
228 String ResourceLoadObserver::primaryDomain(const URL& url)
229 {
230     String host = url.host();
231     Vector<String> hostSplitOnDot;
232     
233     host.split('.', false, hostSplitOnDot);
234
235     String primaryDomain;
236     if (host.isNull())
237         primaryDomain = "nullOrigin";
238     else if (hostSplitOnDot.size() < 3)
239         primaryDomain = host;
240     else {
241         // Skip TLD and then up to two domains smaller than 4 characters
242         int primaryDomainCutOffIndex = hostSplitOnDot.size() - 2;
243
244         // Start with TLD as a given part
245         size_t numberOfParts = 1;
246         for (; primaryDomainCutOffIndex >= 0; --primaryDomainCutOffIndex) {
247             ++numberOfParts;
248
249             // We have either a domain part that's 4 chars or longer, or 3 domain parts including TLD
250             if (hostSplitOnDot.at(primaryDomainCutOffIndex).length() >= 4 || numberOfParts >= 3)
251                 break;
252         }
253
254         if (primaryDomainCutOffIndex < 0)
255             primaryDomain = host;
256         else {
257             StringBuilder builder;
258             builder.append(hostSplitOnDot.at(primaryDomainCutOffIndex));
259             for (size_t j = primaryDomainCutOffIndex + 1; j < hostSplitOnDot.size(); ++j) {
260                 builder.append('.');
261                 builder.append(hostSplitOnDot[j]);
262             }
263             primaryDomain = builder.toString();
264         }
265     }
266
267     return primaryDomain;
268 }
269
270 String ResourceLoadObserver::statisticsForOrigin(const String& origin)
271 {
272     return m_store ? m_store->statisticsForOrigin(origin) : emptyString();
273 }
274
275 }