Reviewed by Darin.
authorap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jun 2006 15:00:16 +0000 (15:00 +0000)
committerap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jun 2006 15:00:16 +0000 (15:00 +0000)
        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=7461
          Always encode the path part of an URI as UTF-8

        Test: http/tests/uri/utf8-path.html

        * platform/KURL.cpp:
        (encodeRelativeString): Always set pathEncoding to UTF-8.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@15010 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/http/tests/uri/intercept/.htaccess [new file with mode: 0644]
LayoutTests/http/tests/uri/resources/print-uri.php [new file with mode: 0644]
LayoutTests/http/tests/uri/utf8-path-expected.txt [new file with mode: 0644]
LayoutTests/http/tests/uri/utf8-path.html [new file with mode: 0644]
WebCore/ChangeLog
WebCore/platform/KURL.cpp

index 175d3ab14ba2d24950443670b03d88787f6d7ebc..8b0590c33566eb703b75ea5f7c7546ac200575bf 100644 (file)
@@ -1,3 +1,15 @@
+2006-06-24  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - http://bugzilla.opendarwin.org/show_bug.cgi?id=7461
+          Always encode the path part of an URI as UTF-8
+
+        * http/tests/uri/intercept/.htaccess: Added.
+        * http/tests/uri/resources/print-uri.php: Added.
+        * http/tests/uri/utf8-path-expected.txt: Added.
+        * http/tests/uri/utf8-path.html: Added.
+
 2006-06-24  Graham Dennis  <Graham.Dennis@gmail.com>
 
         Reviewed by Darin, landed by ap.
diff --git a/LayoutTests/http/tests/uri/intercept/.htaccess b/LayoutTests/http/tests/uri/intercept/.htaccess
new file mode 100644 (file)
index 0000000..719eac4
--- /dev/null
@@ -0,0 +1,2 @@
+RewriteEngine on
+RewriteRule ^.* /uri/resources/print-uri.php [L,NS]
diff --git a/LayoutTests/http/tests/uri/resources/print-uri.php b/LayoutTests/http/tests/uri/resources/print-uri.php
new file mode 100644 (file)
index 0000000..8aceefe
--- /dev/null
@@ -0,0 +1,7 @@
+<?php
+    header("Content-Type: text/javascript");
+    header("Expires: Thu, 01 Dec 2003 16:00:00 GMT\n");
+    header("Cache-Control: no-store, no-cache, must-revalidate\n");
+    header("Pragma: no-cache\n");
+    echo "document.write('" . $_SERVER['REQUEST_URI'] . "');";
+?>
diff --git a/LayoutTests/http/tests/uri/utf8-path-expected.txt b/LayoutTests/http/tests/uri/utf8-path-expected.txt
new file mode 100644 (file)
index 0000000..f7d9c45
--- /dev/null
@@ -0,0 +1,12 @@
+Test for bug 7461: Always encode the path part of an URI as UTF-8
+
+Test the URL as it is passed to the server. WinIE 6 and Firefox 3.0a results are currently diffrerent: IE uses the target encoding for the query path, while Firefox uses the page encoding. Also, WinIE doesn't percent-encode the query path.
+"/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9" (no target charset specified)
+"/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9" (target charset=windows-1251, same as page)
+"/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9" (target charset=iso-8859-5)
+
+Show the source attribute of the scripts. WinIE 6 doesn't use percent encoding here, while Firefox does.
+"http://127.0.0.1:8000/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9"
+"http://127.0.0.1:8000/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9"
+"http://127.0.0.1:8000/uri/intercept/print/%D1%84%D0%B0%D0%B9%D0%BB.js?%F7%E5%E9=%EC%EE%E9"
+
diff --git a/LayoutTests/http/tests/uri/utf8-path.html b/LayoutTests/http/tests/uri/utf8-path.html
new file mode 100644 (file)
index 0000000..9c1378c
--- /dev/null
@@ -0,0 +1,36 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
+</head>
+<body>
+
+<script>
+    if (window.layoutTestController)
+        layoutTestController.dumpAsText();
+</script>
+
+<p>Test for <a href='http://bugzilla.opendarwin.org/show_bug.cgi?id=7461'>bug 7461</a>: 
+Always encode the path part of an URI as UTF-8</p>
+
+Test the URL as it is passed to the server. WinIE 6 and Firefox 3.0a results are currently diffrerent:
+IE uses the target encoding for the query path, while Firefox uses the page encoding. Also, WinIE doesn't percent-encode
+the query path.<br>
+"<script id=scr1 src="intercept/print/ôàéë.js?÷åé=ìîé"></script>" (no target charset specified)<br>
+"<script id=scr2 charset="windows-1251" src="intercept/print/ôàéë.js?÷åé=ìîé"></script>" (target charset=windows-1251, same as page)<br>
+"<script id=scr3 charset="iso-8859-5" src="intercept/print/ôàéë.js?÷åé=ìîé"></script>" (target charset=iso-8859-5)<br><br>
+
+Show the source attribute of the scripts. WinIE 6 doesn't use percent encoding here, while Firefox does.<br>
+<script>
+ try {
+  document.write('"' + document.scripts[1].src + '"<br>');
+  document.write('"' + document.scripts[2].src + '"<br>');
+  document.write('"' + document.scripts[3].src + '"<br>');
+ } catch (ex) {
+  document.write('"' + document.getElementById("scr1").src + '"<br>');
+  document.write('"' + document.getElementById("scr2").src + '"<br>');
+  document.write('"' + document.getElementById("scr3").src + '"<br>');
+ }
+</script>
+
+</body>
+</html>
index 96fe14eab795a39b224e15c6878b03667cde3a67..54b7b59153bcb9e552fe486fbf0ceff26c71e8e9 100644 (file)
@@ -1,3 +1,15 @@
+2006-06-24  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=7461
+          Always encode the path part of an URI as UTF-8
+
+        Test: http/tests/uri/utf8-path.html
+
+        * platform/KURL.cpp:
+        (encodeRelativeString): Always set pathEncoding to UTF-8.
+
 2006-06-24  Graham Dennis  <Graham.Dennis@gmail.com>
 
         Reviewed by Darin, landed by ap.
index a456665664b1466af89791b3afbc1d5c2789db68..a5d7f54984288b6a66e9701c376422c30f61c343 100644 (file)
@@ -1396,40 +1396,8 @@ static char *encodeRelativeString(const KURL &base, const DeprecatedString &rel,
 
     char *strBuffer;
 
-    static const WebCore::TextEncoding utf8Encoding(UTF8Encoding);
-
-    WebCore::TextEncoding pathEncoding = encoding.isValid() ? encoding : utf8Encoding;
-    WebCore::TextEncoding otherEncoding = pathEncoding;
-    
-    // Always use UTF-8 for mailto URLs because that's what mail applications expect.
-    // Always use UTF-8 for paths in file and help URLs, since they are local filesystem paths,
-    // and help content is often defined with this in mind, but use native encoding for the
-    // non-path parts of the URL.
-    if (pathEncoding != utf8Encoding) {
-        DeprecatedString protocol;
-        if (rel.length() > 0 && isSchemeFirstChar(rel.at(0).latin1())) {
-            for (unsigned i = 1; i < rel.length(); i++) {
-                char p = rel.at(i).latin1();
-                if (p == ':') {
-                    protocol = rel.left(i);
-                    break;
-                }
-                if (!isSchemeChar(p)) {
-                    break;
-                }
-            }
-        }
-        if (!protocol) {
-            protocol = base.protocol();
-        }
-        protocol = protocol.lower();
-        if (protocol == "file" || protocol == "help") {
-            pathEncoding = utf8Encoding;
-        } else if (protocol == "mailto") {
-            pathEncoding = utf8Encoding;
-            otherEncoding = utf8Encoding;
-        }
-    }
+    WebCore::TextEncoding pathEncoding(UTF8Encoding);
+    WebCore::TextEncoding otherEncoding = encoding.isValid() ? encoding : WebCore::TextEncoding(UTF8Encoding);
     
     int pathEnd = -1;
     if (pathEncoding != otherEncoding) {