Resource Load Statistics: Classify resources as prevalent based on redirects to other...
[WebKit-https.git] / Source / WebCore / loader / ResourceLoadObserver.cpp
1 /*
2  * Copyright (C) 2016-2018 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "config.h"
27 #include "ResourceLoadObserver.h"
28
29 #include "DeprecatedGlobalSettings.h"
30 #include "Document.h"
31 #include "Frame.h"
32 #include "FrameLoader.h"
33 #include "HTMLFrameOwnerElement.h"
34 #include "Logging.h"
35 #include "MainFrame.h"
36 #include "Page.h"
37 #include "ResourceLoadStatistics.h"
38 #include "ResourceRequest.h"
39 #include "ResourceResponse.h"
40 #include "SecurityOrigin.h"
41 #include "Settings.h"
42 #include "URL.h"
43
44 namespace WebCore {
45
46 template<typename T> static inline String primaryDomain(const T& value)
47 {
48     return ResourceLoadStatistics::primaryDomain(value);
49 }
50
51 static Seconds timestampResolution { 5_s };
52 static const Seconds minimumNotificationInterval { 5_s };
53
54 ResourceLoadObserver& ResourceLoadObserver::shared()
55 {
56     static NeverDestroyed<ResourceLoadObserver> resourceLoadObserver;
57     return resourceLoadObserver;
58 }
59
60 static bool shouldEnableSiteSpecificQuirks(Page* page)
61 {
62 #if PLATFORM(IOS)
63     UNUSED_PARAM(page);
64
65     // There is currently no way to toggle the needsSiteSpecificQuirks setting on iOS so we always enable
66     // the site-specific quirks on iOS.
67     return true;
68 #else
69     return page && page->settings().needsSiteSpecificQuirks();
70 #endif
71 }
72
73 // FIXME: Temporary fix for <rdar://problem/32343256> until content can be updated.
74 static bool areDomainsAssociated(Page* page, const String& firstDomain, const String& secondDomain)
75 {
76     static NeverDestroyed<HashMap<String, unsigned>> metaDomainIdentifiers = [] {
77         HashMap<String, unsigned> map;
78
79         // Domains owned by Dow Jones & Company, Inc.
80         const unsigned dowJonesIdentifier = 1;
81         map.add(ASCIILiteral("dowjones.com"), dowJonesIdentifier);
82         map.add(ASCIILiteral("wsj.com"), dowJonesIdentifier);
83         map.add(ASCIILiteral("barrons.com"), dowJonesIdentifier);
84         map.add(ASCIILiteral("marketwatch.com"), dowJonesIdentifier);
85         map.add(ASCIILiteral("wsjplus.com"), dowJonesIdentifier);
86
87         return map;
88     }();
89
90     if (firstDomain == secondDomain)
91         return true;
92
93     ASSERT(!equalIgnoringASCIICase(firstDomain, secondDomain));
94
95     if (!shouldEnableSiteSpecificQuirks(page))
96         return false;
97
98     unsigned firstMetaDomainIdentifier = metaDomainIdentifiers.get().get(firstDomain);
99     if (!firstMetaDomainIdentifier)
100         return false;
101
102     return firstMetaDomainIdentifier == metaDomainIdentifiers.get().get(secondDomain);
103 }
104
105 void ResourceLoadObserver::setNotificationCallback(WTF::Function<void (Vector<ResourceLoadStatistics>&&)>&& notificationCallback)
106 {
107     ASSERT(!m_notificationCallback);
108     m_notificationCallback = WTFMove(notificationCallback);
109 }
110
111 ResourceLoadObserver::ResourceLoadObserver()
112     : m_notificationTimer(*this, &ResourceLoadObserver::notifyObserver)
113 {
114 }
115
116 static inline bool is3xxRedirect(const ResourceResponse& response)
117 {
118     return response.httpStatusCode() >= 300 && response.httpStatusCode() <= 399;
119 }
120
121 bool ResourceLoadObserver::shouldLog(Page* page) const
122 {
123     // FIXME: Err on the safe side until we have sorted out what to do in worker contexts
124     if (!page)
125         return false;
126
127     return DeprecatedGlobalSettings::resourceLoadStatisticsEnabled() && !page->usesEphemeralSession() && m_notificationCallback;
128 }
129
130 static WallTime reduceTimeResolution(WallTime time)
131 {
132     return WallTime::fromRawSeconds(std::floor(time.secondsSinceEpoch() / timestampResolution) * timestampResolution.seconds());
133 }
134
135 void ResourceLoadObserver::logFrameNavigation(const Frame& frame, const Frame& topFrame, const ResourceRequest& newRequest, const URL& redirectUrl)
136 {
137     ASSERT(frame.document());
138     ASSERT(topFrame.document());
139     ASSERT(topFrame.page());
140
141     auto* page = topFrame.page();
142     if (!shouldLog(page))
143         return;
144
145     auto sourceURL = redirectUrl;
146     bool isRedirect = !redirectUrl.isNull();
147     if (!isRedirect)
148         sourceURL = nonNullOwnerURL(*frame.document());
149
150     auto& targetURL = newRequest.url();
151     auto& mainFrameURL = topFrame.document()->url();
152     
153     if (!targetURL.isValid() || !mainFrameURL.isValid())
154         return;
155
156     auto targetHost = targetURL.host();
157     auto mainFrameHost = mainFrameURL.host();
158
159     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == sourceURL.host())
160         return;
161
162     auto targetPrimaryDomain = primaryDomain(targetURL);
163     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
164     auto sourcePrimaryDomain = primaryDomain(sourceURL);
165     bool shouldCallNotificationCallback = false;
166
167     if (targetHost != mainFrameHost
168         && !(areDomainsAssociated(page, targetPrimaryDomain, mainFramePrimaryDomain) || areDomainsAssociated(page, targetPrimaryDomain, sourcePrimaryDomain))) {
169         auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
170         targetStatistics.lastSeen = reduceTimeResolution(WallTime::now());
171         if (targetStatistics.subframeUnderTopFrameOrigins.add(mainFramePrimaryDomain).isNewEntry)
172             shouldCallNotificationCallback = true;
173     }
174
175     if (isRedirect
176         && !areDomainsAssociated(page, sourcePrimaryDomain, targetPrimaryDomain)) {
177         bool isNewRedirectToEntry = false;
178         bool isNewRedirectFromEntry = false;
179         if (frame.isMainFrame()) {
180             auto& redirectingOriginStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
181             isNewRedirectToEntry = redirectingOriginStatistics.topFrameUniqueRedirectsTo.add(targetPrimaryDomain).isNewEntry;
182             auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
183             isNewRedirectFromEntry = targetStatistics.topFrameUniqueRedirectsFrom.add(sourcePrimaryDomain).isNewEntry;
184         } else {
185             auto& redirectingOriginStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
186             isNewRedirectToEntry = redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain).isNewEntry;
187             auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
188             isNewRedirectFromEntry = targetStatistics.subresourceUniqueRedirectsFrom.add(sourcePrimaryDomain).isNewEntry;
189         }
190
191         if (isNewRedirectToEntry || isNewRedirectFromEntry)
192             shouldCallNotificationCallback = true;
193     }
194
195     if (shouldCallNotificationCallback)
196         scheduleNotificationIfNeeded();
197 }
198
199 // FIXME: This quirk was added to address <rdar://problem/33325881> and should be removed once content is fixed.
200 static bool resourceNeedsSSOQuirk(Page* page, const URL& url)
201 {
202     if (!shouldEnableSiteSpecificQuirks(page))
203         return false;
204
205     return equalIgnoringASCIICase(url.host(), "sp.auth.adobe.com");
206 }
207
208 void ResourceLoadObserver::logSubresourceLoading(const Frame* frame, const ResourceRequest& newRequest, const ResourceResponse& redirectResponse)
209 {
210     ASSERT(frame->page());
211
212     auto* page = frame->page();
213     if (!shouldLog(page))
214         return;
215
216     bool isRedirect = is3xxRedirect(redirectResponse);
217     const URL& sourceURL = redirectResponse.url();
218     const URL& targetURL = newRequest.url();
219     const URL& mainFrameURL = frame ? frame->mainFrame().document()->url() : URL();
220     
221     auto targetHost = targetURL.host();
222     auto mainFrameHost = mainFrameURL.host();
223
224     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost || (isRedirect && targetHost == sourceURL.host()))
225         return;
226
227     auto targetPrimaryDomain = primaryDomain(targetURL);
228     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
229     auto sourcePrimaryDomain = primaryDomain(sourceURL);
230     
231     if (areDomainsAssociated(page, targetPrimaryDomain, mainFramePrimaryDomain) || (isRedirect && areDomainsAssociated(page, targetPrimaryDomain, sourcePrimaryDomain)))
232         return;
233
234     if (resourceNeedsSSOQuirk(page, targetURL))
235         return;
236
237     bool shouldCallNotificationCallback = false;
238     {
239         auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
240         targetStatistics.lastSeen = reduceTimeResolution(WallTime::now());
241         if (targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain).isNewEntry)
242             shouldCallNotificationCallback = true;
243     }
244
245     if (isRedirect) {
246         auto& redirectingOriginStatistics = ensureResourceStatisticsForPrimaryDomain(sourcePrimaryDomain);
247         bool isNewRedirectToEntry = redirectingOriginStatistics.subresourceUniqueRedirectsTo.add(targetPrimaryDomain).isNewEntry;
248         auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
249         bool isNewRedirectFromEntry = targetStatistics.subresourceUniqueRedirectsFrom.add(sourcePrimaryDomain).isNewEntry;
250
251         if (isNewRedirectToEntry || isNewRedirectFromEntry)
252             shouldCallNotificationCallback = true;
253     }
254
255     if (shouldCallNotificationCallback)
256         scheduleNotificationIfNeeded();
257 }
258
259 void ResourceLoadObserver::logWebSocketLoading(const Frame* frame, const URL& targetURL)
260 {
261     // FIXME: Web sockets can run in detached frames. Decide how to count such connections.
262     // See LayoutTests/http/tests/websocket/construct-in-detached-frame.html
263     if (!frame)
264         return;
265
266     auto* page = frame->page();
267     if (!shouldLog(page))
268         return;
269
270     auto& mainFrameURL = frame->mainFrame().document()->url();
271
272     auto targetHost = targetURL.host();
273     auto mainFrameHost = mainFrameURL.host();
274     
275     if (targetHost.isEmpty() || mainFrameHost.isEmpty() || targetHost == mainFrameHost)
276         return;
277     
278     auto targetPrimaryDomain = primaryDomain(targetURL);
279     auto mainFramePrimaryDomain = primaryDomain(mainFrameURL);
280     
281     if (areDomainsAssociated(page, targetPrimaryDomain, mainFramePrimaryDomain))
282         return;
283
284     auto& targetStatistics = ensureResourceStatisticsForPrimaryDomain(targetPrimaryDomain);
285     targetStatistics.lastSeen = reduceTimeResolution(WallTime::now());
286     if (targetStatistics.subresourceUnderTopFrameOrigins.add(mainFramePrimaryDomain).isNewEntry)
287         scheduleNotificationIfNeeded();
288 }
289
290 void ResourceLoadObserver::logUserInteractionWithReducedTimeResolution(const Document& document)
291 {
292     if (!shouldLog(document.page()))
293         return;
294
295     ASSERT(document.page());
296
297     auto& url = document.url();
298     if (url.isBlankURL() || url.isEmpty())
299         return;
300
301     auto domain = primaryDomain(url);
302     auto newTime = reduceTimeResolution(WallTime::now());
303     auto lastReportedUserInteraction = m_lastReportedUserInteractionMap.get(domain);
304     if (newTime == lastReportedUserInteraction)
305         return;
306
307     m_lastReportedUserInteractionMap.set(domain, newTime);
308
309     auto& statistics = ensureResourceStatisticsForPrimaryDomain(domain);
310     statistics.hadUserInteraction = true;
311     statistics.lastSeen = newTime;
312     statistics.mostRecentUserInteractionTime = newTime;
313
314     m_notificationTimer.stop();
315     notifyObserver();
316
317 #if HAVE(CFNETWORK_STORAGE_PARTITIONING) && !RELEASE_LOG_DISABLED
318     if (shouldLogUserInteraction()) {
319         auto counter = ++m_loggingCounter;
320 #define LOCAL_LOG(str, ...) \
321         RELEASE_LOG(ResourceLoadStatistics, "ResourceLoadObserver::logUserInteraction: counter = %" PRIu64 ": " str, counter, ##__VA_ARGS__)
322
323         auto escapeForJSON = [](String s) {
324             s.replace('\\', "\\\\").replace('"', "\\\"");
325             return s;
326         };
327         auto escapedURL = escapeForJSON(url.string());
328         auto escapedDomain = escapeForJSON(domain);
329
330         LOCAL_LOG(R"({ "url": "%{public}s",)", escapedURL.utf8().data());
331         LOCAL_LOG(R"(  "domain" : "%{public}s",)", escapedDomain.utf8().data());
332         LOCAL_LOG(R"(  "until" : %f })", newTime.secondsSinceEpoch().seconds());
333
334 #undef LOCAL_LOG
335     }
336 #endif
337 }
338
339 ResourceLoadStatistics& ResourceLoadObserver::ensureResourceStatisticsForPrimaryDomain(const String& primaryDomain)
340 {
341     auto addResult = m_resourceStatisticsMap.ensure(primaryDomain, [&primaryDomain] {
342         return ResourceLoadStatistics(primaryDomain);
343     });
344     return addResult.iterator->value;
345 }
346
347 void ResourceLoadObserver::scheduleNotificationIfNeeded()
348 {
349     ASSERT(m_notificationCallback);
350     if (m_resourceStatisticsMap.isEmpty()) {
351         m_notificationTimer.stop();
352         return;
353     }
354
355     if (!m_notificationTimer.isActive())
356         m_notificationTimer.startOneShot(minimumNotificationInterval);
357 }
358
359 void ResourceLoadObserver::notifyObserver()
360 {
361     ASSERT(m_notificationCallback);
362     m_notificationTimer.stop();
363     m_notificationCallback(takeStatistics());
364 }
365
366 String ResourceLoadObserver::statisticsForOrigin(const String& origin)
367 {
368     auto iter = m_resourceStatisticsMap.find(origin);
369     if (iter == m_resourceStatisticsMap.end())
370         return emptyString();
371
372     return "Statistics for " + origin + ":\n" + iter->value.toString();
373 }
374
375 Vector<ResourceLoadStatistics> ResourceLoadObserver::takeStatistics()
376 {
377     Vector<ResourceLoadStatistics> statistics;
378     statistics.reserveInitialCapacity(m_resourceStatisticsMap.size());
379     for (auto& statistic : m_resourceStatisticsMap.values())
380         statistics.uncheckedAppend(WTFMove(statistic));
381
382     m_resourceStatisticsMap.clear();
383
384     return statistics;
385 }
386
387 void ResourceLoadObserver::clearState()
388 {
389     m_notificationTimer.stop();
390     m_resourceStatisticsMap.clear();
391     m_lastReportedUserInteractionMap.clear();
392 }
393
394 URL ResourceLoadObserver::nonNullOwnerURL(const Document& document) const
395 {
396     auto url = document.url();
397     auto* frame = document.frame();
398     auto host = document.url().host();
399
400     while ((host.isNull() || host.isEmpty()) && frame && !frame->isMainFrame()) {
401         auto* ownerElement = frame->ownerElement();
402
403         ASSERT(ownerElement != nullptr);
404         
405         auto& doc = ownerElement->document();
406         frame = doc.frame();
407         url = doc.url();
408         host = url.host();
409     }
410
411     return url;
412 }
413
414 } // namespace WebCore