2010-06-25 Jay Civelli <jcivelli@chromium.org>
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 26 Jun 2010 06:02:27 +0000 (06:02 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 26 Jun 2010 06:02:27 +0000 (06:02 +0000)
        Reviewed by Darin Fisher.

        [chromium] Made WebFrame not report the text from hidden frames.
        (some pages contain hidden frames with garbage text that
        should not be indexed or used to detect the page's language).
        https://bugs.webkit.org/show_bug.cgi?id=39456

        * DEPS:
        * WebKit.gyp:
        * src/WebFrameImpl.cpp:
        (WebKit::frameContentAsPlainText):
        * tests/RunAllTests.cpp:
        (main):
        * tests/WebFrameTest.cpp: Added.
        * tests/data: Added.
        * tests/data/iframes_test.html: Added.
        * tests/data/invisible_iframe.html: Added.
        * tests/data/visible_iframe.html: Added.
        * tests/data/zero_sized_iframe.html: Added.

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@61943 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebKit/chromium/ChangeLog
WebKit/chromium/WebKit.gyp
WebKit/chromium/src/WebFrameImpl.cpp
WebKit/chromium/tests/RunAllTests.cpp
WebKit/chromium/tests/WebFrameTest.cpp [new file with mode: 0644]
WebKit/chromium/tests/data/iframes_test.html [new file with mode: 0644]
WebKit/chromium/tests/data/invisible_iframe.html [new file with mode: 0644]
WebKit/chromium/tests/data/visible_iframe.html [new file with mode: 0644]
WebKit/chromium/tests/data/zero_sized_iframe.html [new file with mode: 0644]

index 2fb3ce20c0bd350ed65e038ce2fa9fdf63e7a155..afe4481a03ee265ac472d456308068a23948260d 100644 (file)
@@ -1,3 +1,25 @@
+2010-06-25  Jay Civelli  <jcivelli@chromium.org>
+
+        Reviewed by Darin Fisher.
+
+        [chromium] Made WebFrame not report the text from hidden frames.
+        (some pages contain hidden frames with garbage text that
+        should not be indexed or used to detect the page's language).
+        https://bugs.webkit.org/show_bug.cgi?id=39456
+
+        * DEPS:
+        * WebKit.gyp:
+        * src/WebFrameImpl.cpp:
+        (WebKit::frameContentAsPlainText):
+        * tests/RunAllTests.cpp:
+        (main):
+        * tests/WebFrameTest.cpp: Added.
+        * tests/data: Added.
+        * tests/data/iframes_test.html: Added.
+        * tests/data/invisible_iframe.html: Added.
+        * tests/data/visible_iframe.html: Added.
+        * tests/data/zero_sized_iframe.html: Added.
+
 2010-06-25  Bernhard Bauer  <bauerb@chromium.org>
 
         Reviewed by Darin Fisher.
index d9d16645c4d3f1a70a05d0b8ee232f4ebc110e9d..507107ae5a1fb343f9de093b32f9782ae8cb7076 100644 (file)
                 '<(chromium_src_dir)/testing/gtest.gyp:gtest',
                 '<(chromium_src_dir)/base/base.gyp:base',
                 '<(chromium_src_dir)/base/base.gyp:base_i18n',
+                '<(chromium_src_dir)/webkit/support/webkit_support.gyp:webkit_support',
                 '<(chromium_src_dir)/gpu/gpu.gyp:gles2_c_lib',
             ],
             'include_dirs': [
                 'tests/KeyboardTest.cpp',
                 'tests/KURLTest.cpp',
                 'tests/RunAllTests.cpp',
+                'tests/WebFrameTest.cpp',
             ],
             'conditions': [
                 ['OS=="win"', {
index c5fa1fe834f18f71d6dec0cd372939031a0dfbac..89188974c3d00016e4948208475cedfe5382e13f 100644 (file)
@@ -233,6 +233,15 @@ static void frameContentAsPlainText(size_t maxChars, Frame* frame,
     // Recursively walk the children.
     FrameTree* frameTree = frame->tree();
     for (Frame* curChild = frameTree->firstChild(); curChild; curChild = curChild->tree()->nextSibling()) {
+        // Ignore the text of non-visible frames.
+        RenderView* contentRenderer = curChild->contentRenderer();
+        RenderPart* ownerRenderer = curChild->ownerRenderer();        
+        if (!contentRenderer || !contentRenderer->width() || !contentRenderer->height()
+            || (contentRenderer->x() + contentRenderer->width() <= 0) || (contentRenderer->y() + contentRenderer->height() <= 0)
+            || (ownerRenderer && ownerRenderer->style() && ownerRenderer->style()->visibility() != VISIBLE)) {
+            continue;
+        }
+
         // Make sure the frame separator won't fill up the buffer, and give up if
         // it will. The danger is if the separator will make the buffer longer than
         // maxChars. This will cause the computation above:
index 0f3f82fb7c8be3c75b6cfb5dfa928cc22cbf6a34..cfcfbeec5769e806dda512aedacde021dfe64e53 100644 (file)
 
 #include "WebKit.h"
 #include "WebKitClient.h"
-
-// WebKitClient has a protected destructor, so we need to subclass.
-class DummyWebKitClient : public WebKit::WebKitClient {
-};
+#include <webkit/support/webkit_support.h>
 
 int main(int argc, char** argv)
 {
-    DummyWebKitClient dummyClient;
-    WebKit::initialize(&dummyClient);
-
-    int result = TestSuite(argc, argv).Run();
-
-    WebKit::shutdown();
+    TestSuite testSuite(argc, argv);
+    // TestSuite must be created before SetUpTestEnvironment so it performs
+    // initializations needed by WebKit support.
+    webkit_support::SetUpTestEnvironmentForUnitTests();
+    int result = testSuite.Run();
+    webkit_support::TearDownTestEnvironment();
     return result;
 }
diff --git a/WebKit/chromium/tests/WebFrameTest.cpp b/WebKit/chromium/tests/WebFrameTest.cpp
new file mode 100644 (file)
index 0000000..f582104
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <googleurl/src/gurl.h>
+#include <gtest/gtest.h>
+#include <webkit/support/webkit_support.h>
+#include "WebFrame.h"
+#include "WebFrameClient.h"
+#include "WebString.h"
+#include "WebURL.h"
+#include "WebURLRequest.h"
+#include "WebURLResponse.h"
+#include "WebView.h"
+
+using namespace WebKit;
+
+namespace {
+
+class WebFrameTest : public testing::Test {
+public:
+    WebFrameTest() {}
+
+    virtual void TearDown()
+    {
+        webkit_support::UnregisterAllMockedURLs();
+    }
+
+    void registerMockedURLLoad(const WebURL& url, const WebURLResponse& response, const WebString& fileName)
+    {
+        std::string filePath = webkit_support::GetWebKitRootDir().utf8();
+        filePath.append("/WebKit/chromium/tests/data/");
+        filePath.append(fileName.utf8());
+        webkit_support::RegisterMockedURL(url, response, WebString::fromUTF8(filePath));
+    }
+
+    void serveRequests()
+    {
+        webkit_support::ServeAsynchronousMockedRequests();
+    }
+};
+
+class TestWebFrameClient : public WebFrameClient {
+};
+
+TEST_F(WebFrameTest, ContentText)
+{
+    // Register our resources.
+    WebURLResponse response;
+    response.initialize();
+    response.setMIMEType("text/html");
+    std::string rootURL = "http://www.test.com/";
+    const char* files[] = { "iframes_test.html", "visible_iframe.html",
+                            "invisible_iframe.html", "zero_sized_iframe.html" };
+    for (int i = 0; i < (sizeof(files) / sizeof(char*)); ++i) {
+        WebURL webURL = GURL(rootURL + files[i]);
+        registerMockedURLLoad(webURL, response, WebString::fromUTF8(files[i]));
+    }
+
+    // Create and initialize the WebView.    
+    TestWebFrameClient webFrameClient;
+    WebView* webView = WebView::create(0);
+    webView->initializeMainFrame(&webFrameClient);
+
+    // Load the main frame URL.
+    WebURL testURL(GURL(rootURL + files[0]));
+    WebURLRequest urlRequest;
+    urlRequest.initialize();
+    urlRequest.setURL(testURL);
+    webView->mainFrame()->loadRequest(urlRequest);
+
+    // Load all pending asynchronous requests.
+    serveRequests();
+
+    // Now retrieve the frames text and test it only includes visible elements.
+    std::string content = webView->mainFrame()->contentAsText(1024).utf8();
+    EXPECT_NE(std::string::npos, content.find(" visible paragraph"));
+    EXPECT_NE(std::string::npos, content.find(" visible iframe"));
+    EXPECT_EQ(std::string::npos, content.find(" invisible pararaph"));
+    EXPECT_EQ(std::string::npos, content.find(" invisible iframe"));
+    EXPECT_EQ(std::string::npos, content.find("iframe with zero size"));
+
+    webView->close();
+}
+
+}
diff --git a/WebKit/chromium/tests/data/iframes_test.html b/WebKit/chromium/tests/data/iframes_test.html
new file mode 100644 (file)
index 0000000..85d229a
--- /dev/null
@@ -0,0 +1,10 @@
+<html>
+  <body>
+    <iframe src="visible_iframe.html"></iframe>
+    <iframe width=0 height=0 src="zero_sized_iframe.html"></iframe>
+    <iframe style="visibility:hidden;" src="invisible_iframe.html"></iframe>
+    <p>This is a visible paragraph.</p>
+    <p style="visibility:hidden;">This is an invisible paragraph.</p>
+  </body>
+</html>
+
diff --git a/WebKit/chromium/tests/data/invisible_iframe.html b/WebKit/chromium/tests/data/invisible_iframe.html
new file mode 100644 (file)
index 0000000..3304aec
--- /dev/null
@@ -0,0 +1,6 @@
+<html>
+  <body>
+    This is an invisible frame.
+  </body>
+</html>
+
diff --git a/WebKit/chromium/tests/data/visible_iframe.html b/WebKit/chromium/tests/data/visible_iframe.html
new file mode 100644 (file)
index 0000000..291af3d
--- /dev/null
@@ -0,0 +1,5 @@
+<html>
+  <body>
+    This is a visible iframe.
+  </body>
+</html>
diff --git a/WebKit/chromium/tests/data/zero_sized_iframe.html b/WebKit/chromium/tests/data/zero_sized_iframe.html
new file mode 100644 (file)
index 0000000..6728cab
--- /dev/null
@@ -0,0 +1,5 @@
+<html>
+  <body>
+    This is an iframe with zero size.
+  </body>
+</html>