Rolling out r212757
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadObserver.cpp
1 /*
2  * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadObserver.h"
28
29 #include "Document.h"
30 #include "Frame.h"
31 #include "Logging.h"
32 #include "MainFrame.h"
33 #include "NetworkStorageSession.h"
34 #include "Page.h"
35 #include "PlatformStrategies.h"
36 #include "PublicSuffix.h"
37 #include "ResourceLoadStatistics.h"
38 #include "ResourceLoadStatisticsStore.h"
39 #include "ResourceRequest.h"
40 #include "ResourceResponse.h"
41 #include "SecurityOrigin.h"
42 #include "Settings.h"
43 #include "SharedBuffer.h"
44 #include "URL.h"
45 #include <wtf/CurrentTime.h>
46 #include <wtf/NeverDestroyed.h>
47 #include <wtf/text/StringBuilder.h>
48
49 namespace WebCore {
50
51 // One day in seconds.
52 static auto timestampResolution = 86400;
53
54 ResourceLoadObserver& ResourceLoadObserver::sharedObserver()
55 {
56     static NeverDestroyed<ResourceLoadObserver> resourceLoadObserver;
57     return resourceLoadObserver;
58 }
59
60 RefPtr<ResourceLoadStatisticsStore> ResourceLoadObserver::statisticsStore()
61 {
62     ASSERT(m_store);
63     return m_store;
64 }
65
66 void ResourceLoadObserver::setStatisticsStore(Ref<ResourceLoadStatisticsStore>&& store)
67 {
68     m_store = WTFMove(store);
69 }
70
71 static inline bool is3xxRedirect(const ResourceResponse& response)
72 {
73     return response.httpStatusCode() >= 300 && response.httpStatusCode() <= 399;
74 }
75
76 bool ResourceLoadObserver::shouldLog(Page* page)
77 {
78     // FIXME: Err on the safe side until we have sorted out what to do in worker contexts
79     if (!page)
80         return false;
81     return Settings::resourceLoadStatisticsEnabled()
82         && !page->usesEphemeralSession()
83         && m_store;
84 }
85
86 void ResourceLoadObserver::logFrameNavigation(const Frame& frame, const Frame& topFrame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
87 {
88     ASSERT(frame.document());
89     ASSERT(topFrame.document());
90     ASSERT(topFrame.page());
91     
92     if (!shouldLog(topFrame.page()))
93         return;
94
95     bool isRedirect = is3xxRedirect(redirectResponse);
96     bool isMainFrame = frame.isMainFrame();
97     const URL& sourceURL = frame.document()->url();
98     const URL& targetURL = newRequest.url();
99     const URL& mainFrameURL = topFrame.document()->url();
100     
101     if (!targetURL.isValid() || !mainFrameURL.isValid())
102         return;
103
104     auto targetHost = targetURL.host();
105     auto mainFrameHost = mainFrameURL.host();
106
107     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || targetHost == sourceURL.host())
108         return;
109
110     auto targetPrimaryDomain = primaryDomain(targetURL);
111     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
112     auto sourcePrimaryDomain = primaryDomain(sourceURL);
113     
114     if (targetPrimaryDomain == mainFramePrimaryDomain || targetPrimaryDomain == sourcePrimaryDomain)
115         return;
116
117     auto targetOrigin = SecurityOrigin::create(targetURL);
118     auto targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
119
120     // Always fire if we have previously removed data records for this domain
121     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
122
123     if (isMainFrame)
124         targetStatistics.topFrameHasBeenNavigatedToBefore = true;
125     else {
126         targetStatistics.subframeHasBeenLoadedBefore = true;
127
128         auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
129         auto subframeUnderTopFrameOriginsResult = targetStatistics.subframeUnderTopFrameOrigins.add(mainFramePrimaryDomain);
130         if (subframeUnderTopFrameOriginsResult.isNewEntry)
131             shouldFireDataModificationHandler = true;
132     }
133     
134     if (isRedirect) {
135         auto& redirectingOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
136         
137         if (m_store->isPrevalentResource(targetPrimaryDomain))
138             redirectingOriginResourceStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
139         
140         if (isMainFrame) {
141             ++targetStatistics.topFrameHasBeenRedirectedTo;
142             ++redirectingOriginResourceStatistics.topFrameHasBeenRedirectedFrom;
143         } else {
144             ++targetStatistics.subframeHasBeenRedirectedTo;
145             ++redirectingOriginResourceStatistics.subframeHasBeenRedirectedFrom;
146             redirectingOriginResourceStatistics.subframeUniqueRedirectsTo.add(targetPrimaryDomain);
147             
148             ++targetStatistics.subframeSubResourceCount;
149         }
150     } else {
151         if (sourcePrimaryDomain.isNull() || sourcePrimaryDomain.isEmpty() || sourcePrimaryDomain == "nullOrigin") {
152             if (isMainFrame)
153                 ++targetStatistics.topFrameInitialLoadCount;
154             else
155                 ++targetStatistics.subframeSubResourceCount;
156         } else {
157             auto& sourceOriginResourceStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
158
159             if (isMainFrame) {
160                 ++sourceOriginResourceStatistics.topFrameHasBeenNavigatedFrom;
161                 ++targetStatistics.topFrameHasBeenNavigatedTo;
162             } else {
163                 ++sourceOriginResourceStatistics.subframeHasBeenNavigatedFrom;
164                 ++targetStatistics.subframeHasBeenNavigatedTo;
165             }
166         }
167     }
168
169     m_store->setResourceStatisticsForPrimaryDomain(targetPrimaryDomain, WTFMove(targetStatistics));
170     if (shouldFireDataModificationHandler)
171         m_store->fireDataModificationHandler();
172 }
173     
174 void ResourceLoadObserver::logSubresourceLoading(const Frame* frame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
175 {
176     ASSERT(frame->page());
177
178     if (!shouldLog(frame->page()))
179         return;
180
181     bool isRedirect = is3xxRedirect(redirectResponse);
182     const URL& sourceURL = redirectResponse.url();
183     const URL& targetURL = newRequest.url();
184     const URL& mainFrameURL = frame ? frame->mainFrame().document()->url() : URL();
185     
186     auto targetHost = targetURL.host();
187     auto mainFrameHost = mainFrameURL.host();
188
189     if (targetHost.isEmpty()
190         || mainFrameHost.isEmpty()
191         || targetHost == mainFrameHost
192         || (isRedirect && targetHost == sourceURL.host()))
193         return;
194
195     auto targetPrimaryDomain = primaryDomain(targetURL);
196     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
197     auto sourcePrimaryDomain = primaryDomain(sourceURL);
198     
199     if (targetPrimaryDomain == mainFramePrimaryDomain || (isRedirect && targetPrimaryDomain == sourcePrimaryDomain))
200         return;
201
202     auto& targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
203
204     // Always fire if we have previously removed data records for this domain
205     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
206
207     auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
208     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
209     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
210         shouldFireDataModificationHandler = true;
211
212     if (isRedirect) {
213         auto& redirectingOriginStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
214         
215         // We just inserted to the store, so we need to reget 'targetStatistics'
216         auto& updatedTargetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
217
218         if (m_store->isPrevalentResource(targetPrimaryDomain))
219             redirectingOriginStatistics.redirectedToOtherPrevalentResourceOrigins.add(targetPrimaryDomain);
220         
221         ++redirectingOriginStatistics.subresourceHasBeenRedirectedFrom;
222         ++updatedTargetStatistics.subresourceHasBeenRedirectedTo;
223
224         auto subresourceUniqueRedirectsToResult = redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain);
225         if (subresourceUniqueRedirectsToResult.isNewEntry)
226             shouldFireDataModificationHandler = true;
227
228         ++updatedTargetStatistics.subresourceHasBeenSubresourceCount;
229
230         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
231         
232         updatedTargetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(updatedTargetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
233     } else {
234         ++targetStatistics.subresourceHasBeenSubresourceCount;
235
236         auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
237         
238         targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
239     }
240
241     if (shouldFireDataModificationHandler)
242         m_store->fireDataModificationHandler();
243 }
244
245 void ResourceLoadObserver::logWebSocketLoading(const Frame* frame, const URL& targetURL)
246 {
247     // FIXME: Web sockets can run in detached frames. Decide how to count such connections.
248     // See LayoutTests/http/tests/websocket/construct-in-detached-frame.html
249     if (!frame)
250         return;
251
252     if (!shouldLog(frame->page()))
253         return;
254
255     const URL& mainFrameURL = frame->mainFrame().document()->url();
256
257     auto targetHost = targetURL.host();
258     auto mainFrameHost = mainFrameURL.host();
259     
260     if (targetHost.isEmpty()
261         || mainFrameHost.isEmpty()
262         || targetHost == mainFrameHost)
263         return;
264     
265     auto targetPrimaryDomain = primaryDomain(targetURL);
266     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
267     
268     if (targetPrimaryDomain == mainFramePrimaryDomain)
269         return;
270
271     auto& targetStatistics = m_store->ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
272
273     // Always fire if we have previously removed data records for this domain
274     bool shouldFireDataModificationHandler = targetStatistics.dataRecordsRemoved > 0;
275     
276     auto mainFrameOrigin = SecurityOrigin::create(mainFrameURL);
277     auto subresourceUnderTopFrameOriginsResult = targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain);
278     if (subresourceUnderTopFrameOriginsResult.isNewEntry)
279         shouldFireDataModificationHandler = true;
280
281     ++targetStatistics.subresourceHasBeenSubresourceCount;
282     
283     auto totalVisited = std::max(m_originsVisitedMap.size(), 1U);
284     
285     targetStatistics.subresourceHasBeenSubresourceCountDividedByTotalNumberOfOriginsVisited = static_cast<double>(targetStatistics.subresourceHasBeenSubresourceCount) / totalVisited;
286
287     if (shouldFireDataModificationHandler)
288         m_store->fireDataModificationHandler();
289 }
290
291 static double reduceTimeResolutionToOneDay(double seconds)
292 {
293     return std::floor(seconds / timestampResolution) * timestampResolution;
294 }
295
296 void ResourceLoadObserver::logUserInteractionWithReducedTimeResolution(const Document& document)
297 {
298     ASSERT(document.page());
299
300     if (!shouldLog(document.page()))
301         return;
302
303     auto& url = document.url();
304     if (url.isBlankURL() || url.isEmpty())
305         return;
306
307     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
308     double newTimestamp = reduceTimeResolutionToOneDay(WTF::currentTime());
309     if (newTimestamp == statistics.mostRecentUserInteraction)
310         return;
311
312     statistics.hadUserInteraction = true;
313     statistics.mostRecentUserInteraction = newTimestamp;
314     m_store->fireDataModificationHandler();
315 }
316
317 void ResourceLoadObserver::logUserInteraction(const URL& url)
318 {
319     if (url.isBlankURL() || url.isEmpty())
320         return;
321
322     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
323     statistics.hadUserInteraction = true;
324     statistics.mostRecentUserInteraction = WTF::currentTime();
325 }
326
327 void ResourceLoadObserver::clearUserInteraction(const URL& url)
328 {
329     if (url.isBlankURL() || url.isEmpty())
330         return;
331
332     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
333     
334     statistics.hadUserInteraction = false;
335     statistics.mostRecentUserInteraction = 0;
336 }
337
338 bool ResourceLoadObserver::hasHadUserInteraction(const URL& url)
339 {
340     if (url.isBlankURL() || url.isEmpty())
341         return false;
342
343     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
344     
345     return m_store->hasHadRecentUserInteraction(statistics);
346 }
347
348 void ResourceLoadObserver::setPrevalentResource(const URL& url)
349 {
350     if (url.isBlankURL() || url.isEmpty())
351         return;
352
353     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
354     
355     statistics.isPrevalentResource = true;
356 }
357
358 bool ResourceLoadObserver::isPrevalentResource(const URL& url)
359 {
360     if (url.isBlankURL() || url.isEmpty())
361         return false;
362
363     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
364     
365     return statistics.isPrevalentResource;
366 }
367     
368 void ResourceLoadObserver::clearPrevalentResource(const URL& url)
369 {
370     if (url.isBlankURL() || url.isEmpty())
371         return;
372
373     auto& statistics = m_store->ensureResourceStatisticsForPrimaryDomain(primaryDomain(url));
374     
375     statistics.isPrevalentResource = false;
376 }
377
378 void ResourceLoadObserver::setTimeToLiveUserInteraction(double seconds)
379 {
380     m_store->setTimeToLiveUserInteraction(seconds);
381 }
382
383 void ResourceLoadObserver::fireDataModificationHandler()
384 {
385     m_store->fireDataModificationHandler();
386 }
387
388 String ResourceLoadObserver::primaryDomain(const URL& url)
389 {
390     String primaryDomain;
391     String host = url.host();
392     if (host.isNull() || host.isEmpty())
393         primaryDomain = "nullOrigin";
394 #if ENABLE(PUBLIC_SUFFIX_LIST)
395     else {
396         primaryDomain = topPrivatelyControlledDomain(host);
397         // We will have an empty string here if there is no TLD.
398         // Use the host in such case.
399         if (primaryDomain.isEmpty())
400             primaryDomain = host;
401     }
402 #else
403     else
404         primaryDomain = host;
405 #endif
406
407     return primaryDomain;
408 }
409
410 String ResourceLoadObserver::statisticsForOrigin(const String& origin)
411 {
412     return m_store ? m_store->statisticsForOrigin(origin) : emptyString();
413 }
414
415 }