#include "config.h"
#include "WebPageSerializer.h"
-#include "CSSFontFaceRule.h"
-#include "CSSFontFaceSrcValue.h"
-#include "CSSImageValue.h"
-#include "CSSImportRule.h"
-#include "CSSRule.h"
-#include "CSSRuleList.h"
-#include "CSSStyleRule.h"
-#include "CSSStyleSheet.h"
-#include "CSSValueList.h"
-#include "CachedImage.h"
#include "DocumentLoader.h"
#include "Element.h"
#include "Frame.h"
#include "HTMLAllCollection.h"
#include "HTMLFrameOwnerElement.h"
#include "HTMLInputElement.h"
-#include "HTMLLinkElement.h"
#include "HTMLNames.h"
-#include "HTMLStyleElement.h"
#include "KURL.h"
-#include "KURLHash.h"
-#include "ListHashSet.h"
-#include "StyleCachedImage.h"
-#include "StyleImage.h"
-#include "StyleSheetList.h"
#include "Vector.h"
#include "WebCString.h"
namespace {
-void retrieveResourcesForCSSStyleDeclaration(CSSStyleDeclaration*,
- ListHashSet<KURL>* resourceURLs);
-
-const QualifiedName* getResourceAttributeForElement(HTMLElement* element)
+KURL getSubResourceURLFromElement(Element* element)
{
+ ASSERT(element);
+ const QualifiedName* attributeName = 0;
if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag))
- return &HTMLNames::srcAttr;
-
- if (element->hasTagName(HTMLNames::inputTag)) {
+ attributeName = &HTMLNames::srcAttr;
+ else if (element->hasTagName(HTMLNames::inputTag)) {
HTMLInputElement* input = static_cast<HTMLInputElement*>(element);
if (input->isImageButton())
- return &HTMLNames::srcAttr;
- }
-
- if (element->hasTagName(HTMLNames::bodyTag)
- || element->hasTagName(HTMLNames::tableTag)
- || element->hasTagName(HTMLNames::trTag)
- || element->hasTagName(HTMLNames::tdTag))
- return &HTMLNames::backgroundAttr;
-
- if (element->hasTagName(HTMLNames::blockquoteTag)
- || element->hasTagName(HTMLNames::qTag)
- || element->hasTagName(HTMLNames::delTag)
- || element->hasTagName(HTMLNames::insTag))
- return &HTMLNames::citeAttr;
-
- if (element->hasTagName(HTMLNames::objectTag))
- return &HTMLNames::dataAttr;
-
- if (element->hasTagName(HTMLNames::iframeTag)
- || element->hasTagName(HTMLNames::frameTag)
- || element->hasTagName(HTMLNames::embedTag))
- return &HTMLNames::srcAttr;
-
- if (element->hasTagName(HTMLNames::linkTag)
- && equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css"))
- return &HTMLNames::hrefAttr;
-
- return 0;
-}
-
-void retrieveStyleSheetForElement(HTMLElement* element,
- ListHashSet<CSSStyleSheet*>* styleSheets)
-{
- if (element->hasTagName(HTMLNames::linkTag)) {
- HTMLLinkElement* linkElement = static_cast<HTMLLinkElement*>(element);
- // We are only interested in CSS links.
+ attributeName = &HTMLNames::srcAttr;
+ } else if (element->hasTagName(HTMLNames::bodyTag)
+ || element->hasTagName(HTMLNames::tableTag)
+ || element->hasTagName(HTMLNames::trTag)
+ || element->hasTagName(HTMLNames::tdTag))
+ attributeName = &HTMLNames::backgroundAttr;
+ else if (element->hasTagName(HTMLNames::blockquoteTag)
+ || element->hasTagName(HTMLNames::qTag)
+ || element->hasTagName(HTMLNames::delTag)
+ || element->hasTagName(HTMLNames::insTag))
+ attributeName = &HTMLNames::citeAttr;
+ else if (element->hasTagName(HTMLNames::linkTag)) {
+ // If the link element is not css, ignore it.
if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) {
- StyleSheet* sheet = linkElement->sheet();
- if (sheet && sheet->isCSSStyleSheet())
- styleSheets->add(static_cast<CSSStyleSheet*>(sheet));
+ // FIXME: Add support for extracting links of sub-resources which
+ // are inside style-sheet such as @import, @font-face, url(), etc.
+ attributeName = &HTMLNames::hrefAttr;
}
- return;
- }
- if (element->hasTagName(HTMLNames::styleTag)) {
- HTMLStyleElement* styleElement = static_cast<HTMLStyleElement*>(element);
- StyleSheet* sheet = styleElement->sheet();
- if (sheet && sheet->isCSSStyleSheet())
- styleSheets->add(static_cast<CSSStyleSheet*>(sheet));
- }
+ } else if (element->hasTagName(HTMLNames::objectTag))
+ attributeName = &HTMLNames::dataAttr;
+ else if (element->hasTagName(HTMLNames::embedTag))
+ attributeName = &HTMLNames::srcAttr;
+
+ if (!attributeName)
+ return KURL();
+
+ String value = element->getAttribute(*attributeName);
+ // Ignore javascript content.
+ if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
+ return KURL();
+
+ return element->document()->completeURL(value);
}
-void retrieveResourcesForElement(HTMLElement* element,
- const WebKit::WebVector<WebKit::WebCString>& supportedSchemes,
- ListHashSet<Frame*>* visitedFrames,
+void retrieveResourcesForElement(Element* element,
+ Vector<Frame*>* visitedFrames,
Vector<Frame*>* framesToVisit,
- ListHashSet<CSSStyleSheet*>* styleSheets,
- ListHashSet<KURL>* frameURLs,
- ListHashSet<KURL>* resourceURLs)
+ Vector<KURL>* frameURLs,
+ Vector<KURL>* resourceURLs)
{
- ASSERT(element);
+ // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag)
- || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
- && element->isFrameOwnerElement()) {
- Frame* frame = static_cast<const HTMLFrameOwnerElement*>(element)->contentFrame();
+ || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
+ && element->isFrameOwnerElement()) {
+ Frame* frame = static_cast<HTMLFrameOwnerElement*>(element)->contentFrame();
if (frame) {
if (!visitedFrames->contains(frame))
framesToVisit->append(frame);
}
}
- const QualifiedName* attribute = getResourceAttributeForElement(element);
- if (attribute) {
- String value = element->getAttribute(*attribute);
- if (!value.isEmpty()) {
- KURL url = element->document()->completeURL(value);
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol
- // does not have a cache mechanism, we skip it as well.
- if (url.isValid() && (url.protocolInHTTPFamily() || url.isLocalFile()))
- resourceURLs->add(url);
- }
- }
+ KURL url = getSubResourceURLFromElement(element);
+ if (url.isEmpty() || !url.isValid())
+ return; // No subresource for this node.
- retrieveStyleSheetForElement(element, styleSheets);
-
- // Process in-line style.
- if (CSSStyleDeclaration* styleDeclaration = element->style())
- retrieveResourcesForCSSStyleDeclaration(styleDeclaration, resourceURLs);
+ // Ignore URLs that have a non-standard protocols. Since the FTP protocol
+ // does no have a cache mechanism, we skip it as well.
+ if (!url.protocolInHTTPFamily() && !url.isLocalFile())
+ return;
+
+ if (!resourceURLs->contains(url))
+ resourceURLs->append(url);
}
void retrieveResourcesForFrame(Frame* frame,
const WebKit::WebVector<WebKit::WebCString>& supportedSchemes,
- ListHashSet<Frame*>* visitedFrames,
+ Vector<Frame*>* visitedFrames,
Vector<Frame*>* framesToVisit,
- ListHashSet<CSSStyleSheet*>* styleSheets,
- ListHashSet<KURL>* frameURLs,
- ListHashSet<KURL>* resourceURLs)
+ Vector<KURL>* frameURLs,
+ Vector<KURL>* resourceURLs)
{
- if (!visitedFrames->add(frame).second)
- return; // We have already seen that frame.
-
KURL frameURL = frame->loader()->documentLoader()->request().url();
// If the frame's URL is invalid, ignore it, it is not retrievable.
if (!isValidScheme)
return;
- frameURLs->add(frameURL);
+ // If we have already seen that frame, ignore it.
+ if (visitedFrames->contains(frame))
+ return;
+ visitedFrames->append(frame);
+ if (!frameURLs->contains(frameURL))
+ frameURLs->append(frameURL);
// Now get the resources associated with each node of the document.
RefPtr<HTMLAllCollection> allNodes = frame->document()->all();
for (unsigned i = 0; i < allNodes->length(); ++i) {
+ Node* node = allNodes->item(i);
// We are only interested in HTML resources.
- if (HTMLElement* element = toHTMLElement(allNodes->item(i))) {
- retrieveResourcesForElement(element, supportedSchemes,
- visitedFrames, framesToVisit,
- styleSheets, frameURLs, resourceURLs);
- }
- }
-}
-
-void retrieveResourcesForCSSRule(CSSStyleRule* rule,
- ListHashSet<KURL>* resourceURLs)
-{
- if (rule->style())
- retrieveResourcesForCSSStyleDeclaration(rule->style(), resourceURLs);
-}
-
-void retrieveResourcesForCSSStyleDeclaration(CSSStyleDeclaration* styleDeclaration,
- ListHashSet<KURL>* resourceURLs)
-{
- // The background-image and list-style-image (for ul or ol) are the CSS properties
- // that make use of images. We iterate to make sure we include any other
- // image properties there might be.
- for (unsigned i = 0; i < styleDeclaration->length(); ++i) {
- // FIXME: it's kind of ridiculous to get the property name and then get
- // the value out of the name. Ideally we would get the value out of the
- // property ID, but CSSStyleDeclaration only gives access to property
- // names, not IDs.
- RefPtr<CSSValue> value = styleDeclaration->getPropertyCSSValue(styleDeclaration->item(i));
- if (value->isImageValue()) {
- CSSImageValue* imageValue = static_cast<CSSImageValue*>(value.get());
- StyleImage* styleImage = imageValue->cachedOrPendingImage();
- // Non cached-images are just place-holders and do not contain data.
- if (styleImage->isCachedImage()) {
- StyleSheet* styleSheet = styleDeclaration->stylesheet();
- if (styleSheet->isCSSStyleSheet()) {
- String url = static_cast<StyleCachedImage*>(styleImage)->cachedImage()->url();
- resourceURLs->add(static_cast<CSSStyleSheet*>(styleSheet)->document()->completeURL(url));
- }
- }
- }
- }
-}
-
-void retrieveResourcesForCSSStyleSheet(CSSStyleSheet* styleSheet,
- ListHashSet<CSSStyleSheet*>* visitedStyleSheets,
- const WebKit::WebVector<WebKit::WebCString>& supportedSchemes,
- ListHashSet<KURL>* resourceURLs)
-{
- if (!styleSheet)
- return;
-
- if (!visitedStyleSheets->add(styleSheet).second)
- return; // We have already seen that styleSheet.
-
- // Parse the styles.
- for (unsigned i = 0; i < styleSheet->length(); ++i) {
- StyleBase* item = styleSheet->item(i);
- if (!item)
+ if (!node->isElementNode())
continue;
-
- if (item->isImportRule()) {
- CSSImportRule* importRule = static_cast<CSSImportRule*>(item);
- // The imported CSS file itself is a resource.
- resourceURLs->add(styleSheet->document()->completeURL(importRule->href()));
- // And it may contain some more resources.
- retrieveResourcesForCSSStyleSheet(importRule->styleSheet(), visitedStyleSheets, supportedSchemes, resourceURLs);
- } else if (item->isFontFaceRule()) {
- CSSFontFaceRule* fontFaceRule = static_cast<CSSFontFaceRule*>(item);
- RefPtr<CSSValue> cssValue = fontFaceRule->style()->getPropertyCSSValue(CSSPropertySrc);
- if (cssValue->isValueList()) {
- CSSValueList* valueList = static_cast<CSSValueList*>(cssValue.get());
- for (unsigned j = 0; j < valueList->length(); ++j) {
- // Note that there does not seem to be a way to ensure the value in the list is a CSSFontFaceSrcValue.
- // We do trust that list only contains CSSFontFaceSrcValues as done in WebCore/css/CSSFontSelector.cpp
- CSSFontFaceSrcValue* fontFaceSrc = static_cast<CSSFontFaceSrcValue*>(valueList->item(j));
- resourceURLs->add(styleSheet->document()->completeURL(fontFaceSrc->resource()));
- }
- }
- } else if (item->isStyleRule())
- retrieveResourcesForCSSRule(static_cast<CSSStyleRule*>(item), resourceURLs);
+ retrieveResourcesForElement(static_cast<Element*>(node),
+ visitedFrames, framesToVisit,
+ frameURLs, resourceURLs);
}
}
return false;
Vector<Frame*> framesToVisit;
- ListHashSet<Frame*> visitedFrames;
- ListHashSet<CSSStyleSheet*> styleSheets;
- ListHashSet<KURL> frameKURLs;
- ListHashSet<KURL> resourceKURLs;
+ Vector<Frame*> visitedFrames;
+ Vector<KURL> frameKURLs;
+ Vector<KURL> resourceKURLs;
// Let's retrieve the resources from every frame in this page.
framesToVisit.append(mainFrame->frame());
Frame* frame = framesToVisit[0];
framesToVisit.remove(0);
retrieveResourcesForFrame(frame, supportedSchemes,
- &visitedFrames, &framesToVisit, &styleSheets,
+ &visitedFrames, &framesToVisit,
&frameKURLs, &resourceKURLs);
- }
-
- // While retrieving the frame resources, we also retrieved the CSS style-sheets,
- // we can process them now.
- ListHashSet<CSSStyleSheet*> visitedStyleSheets;
- for (ListHashSet<CSSStyleSheet*>::const_iterator iter = styleSheets.begin();
- iter != styleSheets.end(); ++iter) {
- retrieveResourcesForCSSStyleSheet(*iter, &visitedStyleSheets,
- supportedSchemes, &resourceKURLs);
}
// Converts the results to WebURLs.
- WebVector<WebURL> resultResourceURLs(static_cast<size_t>(resourceKURLs.size()));
- int i = 0;
- for (ListHashSet<KURL>::const_iterator iter = resourceKURLs.begin();
- iter != resourceKURLs.end(); ++iter, ++i) {
- KURL url = *iter;
- resultResourceURLs[i] = url;
+ WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
+ for (size_t i = 0; i < resourceKURLs.size(); ++i) {
+ resultResourceURLs[i] = resourceKURLs[i];
// A frame's src can point to the same URL as another resource, keep the
// resource URL only in such cases.
- frameKURLs.remove(url);
+ size_t index = frameKURLs.find(resourceKURLs[i]);
+ if (index != notFound)
+ frameKURLs.remove(index);
}
*resourceURLs = resultResourceURLs;
- WebVector<WebURL> resultFrameURLs(static_cast<size_t>(frameKURLs.size()));
- i = 0;
- for (ListHashSet<KURL>::const_iterator iter = frameKURLs.begin();
- iter != frameKURLs.end(); ++iter, ++i)
- resultFrameURLs[i] = *iter;
-
+ WebVector<WebURL> resultFrameURLs(frameKURLs.size());
+ for (size_t i = 0; i < frameKURLs.size(); ++i)
+ resultFrameURLs[i] = frameKURLs[i];
*frameURLs = resultFrameURLs;
return true;
#include "WebFrame.h"
#include "WebFrameClient.h"
-#include "WebScriptSource.h"
#include "WebString.h"
#include "WebURL.h"
#include "WebURLRequest.h"
namespace {
class TestWebFrameClient : public WebFrameClient {
-public:
- TestWebFrameClient() : m_scriptEnabled(false) { }
- virtual bool allowScript(WebFrame*, bool /* enabledPerSettings */) { return m_scriptEnabled; }
- bool m_scriptEnabled;
};
class WebPageSerializerTest : public testing::Test {
webkit_support::ServeAsynchronousMockedRequests();
}
- void enableJS()
- {
- m_webFrameClient.m_scriptEnabled = true;
- }
-
- void runOnLoad()
- {
- m_webView->mainFrame()->executeScript(WebScriptSource(WebString::fromUTF8("onLoad()")));
- }
-
static bool webVectorContains(const WebVector<WebURL>& vector, char* url)
{
return vector.contains(WebURL(GURL(url)));
EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/embed.png"));
}
-TEST_F(WebPageSerializerTest, RetrieveCSSResources)
-{
- // Register the mocked frame and load it.
- WebURL topFrameURL = GURL("http://www.test.com");
- registerMockedURLLoad(topFrameURL, WebString::fromUTF8("css_test_page.html"));
- registerMockedURLLoad(GURL("http://www.test.com/link_styles.css"),
- WebString::fromUTF8("link_styles.css"));
- registerMockedURLLoad(GURL("http://www.test.com/import_style_from_link.css"),
- WebString::fromUTF8("import_style_from_link.css"));
- registerMockedURLLoad(GURL("http://www.test.com/import_styles.css"),
- WebString::fromUTF8("import_styles.css"));
-
- enableJS();
- loadURLInTopFrame(topFrameURL);
- runOnLoad();
-
- // Retrieve all resources.
- WebVector<WebURL> frames;
- WebVector<WebURL> resources;
- ASSERT_TRUE(WebPageSerializer::retrieveAllResources(m_webView, m_supportedSchemes, &resources, &frames));
-
- // Tests that all resources from the frame have been retrieved.
- EXPECT_EQ(1, frames.size());
- EXPECT_TRUE(webVectorContains(frames, "http://www.test.com"));
-
- EXPECT_EQ(12, resources.size()); // There should be no duplicates.
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/link_styles.css"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/import_styles.css"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/import_style_from_link.css"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/red_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/orange_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/yellow_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/green_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/blue_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/purple_background.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/Chunkfive.otf"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/ul-dot.png"));
- EXPECT_TRUE(webVectorContains(resources, "http://www.test.com/ol-dot.png"));
-}
-
}