Reviewed by Darin Adler.
authorap@webkit.org <ap@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jan 2009 09:39:19 +0000 (09:39 +0000)
committerap@webkit.org <ap@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 24 Jan 2009 09:39:19 +0000 (09:39 +0000)
        <rdar://problem/6368059> REGRESSION: URL encoding problems on http://www.cineman.ch

        Test: http/tests/xmlhttprequest/encode-request-url-2.html

        * platform/KURL.cpp:
        (WebCore::appendASCII): Added a helper function.
        (WebCore::KURL::KURL): Explicitly encode the URL to call two-argument parse() with better
        specified behavior.
        (WebCore::KURL::init): Ditto. This avoids trying to round-trip an URL encoded into a byte
        stream, but not yet transformed to ASCII-only using percent escapes. Since different parts
        of the byte stream can use different encodings, round-tripping is not possible.
        (WebCore::KURL::parse): Reverted an earlier change that made the single-argument version of
        this function convert the string to utf-8. I think that on the remanining code paths, it is
        correct to assume that the string is all ASCII, but I'm not yet confident enough to drop a
        FIXME warning.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@40213 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/loader/url-parse-1-expected.txt
LayoutTests/http/tests/xmlhttprequest/encode-request-url-2-expected.txt [new file with mode: 0644]
LayoutTests/http/tests/xmlhttprequest/encode-request-url-2.html [new file with mode: 0644]
WebCore/ChangeLog
WebCore/platform/KURL.cpp
WebCore/platform/KURL.h

index 6a5bc4a..debe10b 100644 (file)
@@ -1,3 +1,15 @@
+2009-01-24  Alexey Proskuryakov  <ap@webkit.org>
+
+        Reviewed by Darin Adler.
+
+        <rdar://problem/6368059> REGRESSION: URL encoding problems on http://www.cineman.ch
+
+        * http/tests/xmlhttprequest/encode-request-url-2-expected.txt: Added.
+        * http/tests/xmlhttprequest/encode-request-url-2.html: Added.
+
+        * fast/loader/url-parse-1-expected.txt: Updated test results. Neither new nor old result
+        matches Firefox, and the change is caused by <https://bugs.webkit.org/show_bug.cgi?id=23500>.
+
 2009-01-23  Eric Carlson  <eric.carlson@apple.com>
 
         Reviewed by Adele Peterson
index 010d16c..b5fa866 100644 (file)
@@ -5,7 +5,7 @@ file:///BASE/           /BASE/
 test   file:///BASE/test               /BASE/test
 /      file:///                /
 /test  file:///test            /test
-//     file://         
+//     file:           
 //test file://test     test    
 ///    file:///                /
 ///test        file:///test            /test
diff --git a/LayoutTests/http/tests/xmlhttprequest/encode-request-url-2-expected.txt b/LayoutTests/http/tests/xmlhttprequest/encode-request-url-2-expected.txt
new file mode 100644 (file)
index 0000000..43c9695
--- /dev/null
@@ -0,0 +1,8 @@
+Test how URLs are encoded.
+
+abs-path: PASS
+rel-path: PASS
+absolute: PASS
+Frame, abs-path: PASS
+Frame, rel-path: PASS
diff --git a/LayoutTests/http/tests/xmlhttprequest/encode-request-url-2.html b/LayoutTests/http/tests/xmlhttprequest/encode-request-url-2.html
new file mode 100644 (file)
index 0000000..793d67e
--- /dev/null
@@ -0,0 +1,47 @@
+<head><meta charset="iso-8859-1"></head>
+<body>
+<p>Test how URLs are encoded.</p>
+<ol id=result></ol>
+<script>
+    function log(message)
+    {
+        var item = document.createElement("li");
+        item.appendChild(document.createTextNode(message));
+        document.getElementById("result").appendChild(item);
+    }
+
+    if (window.layoutTestController) {
+        layoutTestController.waitUntilDone();
+        layoutTestController.dumpAsText();
+    }
+
+    <!-- Firefox 3 encodes query part of XMLHttpRequest requests as UTF-8, but WebKit doesn't special case these. -->
+    var req = new XMLHttpRequest;
+    req.open("GET", "/xmlhttprequest/resources/print-query.cgi?Zürich", false);
+    req.send();
+    log("abs-path: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+    req.open("GET", "resources/print-query.cgi?Zürich", false);
+    req.send();
+    log("rel-path: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+    req.open("GET", document.URL.replace(/encode-request-url-2\.html/, "") + "resources/print-query.cgi?Zürich", false);
+    req.send();
+    log("absolute: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+    var framesLoaded = 0;
+    function frameLoaded()
+    {
+        ++framesLoaded;
+        if (framesLoaded == 2) {
+            log("Frame, abs-path: " + (frames[0].document.documentElement.textContent == "Z%FCrich" ? "PASS" : "FAIL (" + frames[0].document.documentElement.textContent + ")"));
+            log("Frame, rel-path: " + (frames[1].document.documentElement.textContent == "Z%FCrich" ? "PASS" : "FAIL (" + frames[0].document.documentElement.textContent + ")"));
+
+            if (window.layoutTestController)
+                layoutTestController.notifyDone();
+        }
+    }
+</script>
+<iframe src="/xmlhttprequest/resources/print-query.cgi?Zürich" onload="frameLoaded()"></iframe>
+<iframe src="resources/print-query.cgi?Zürich" onload="frameLoaded()"></iframe>
+</body>
index d58e805..5c2ffe0 100644 (file)
@@ -1,3 +1,23 @@
+2009-01-24  Alexey Proskuryakov  <ap@webkit.org>
+
+        Reviewed by Darin Adler.
+
+        <rdar://problem/6368059> REGRESSION: URL encoding problems on http://www.cineman.ch
+
+        Test: http/tests/xmlhttprequest/encode-request-url-2.html
+
+        * platform/KURL.cpp:
+        (WebCore::appendASCII): Added a helper function.
+        (WebCore::KURL::KURL): Explicitly encode the URL to call two-argument parse() with better
+        specified behavior.
+        (WebCore::KURL::init): Ditto. This avoids trying to round-trip an URL encoded into a byte
+        stream, but not yet transformed to ASCII-only using percent escapes. Since different parts
+        of the byte stream can use different encodings, round-tripping is not possible.
+        (WebCore::KURL::parse): Reverted an earlier change that made the single-argument version of
+        this function convert the string to utf-8. I think that on the remanining code paths, it is
+        correct to assume that the string is all ASCII, but I'm not yet confident enough to drop a
+        FIXME warning.
+
 2009-01-24  Jan Michael Alonzo  <jmalonzo@webkit.org>
 
         Gtk build fix after r40170
index 2d9c6c9..620cbeb 100644 (file)
@@ -243,6 +243,14 @@ static void copyASCII(const UChar* src, int length, char* dest)
         dest[i] = static_cast<char>(src[i]);
 }
 
+static void appendASCII(const String& base, const char* rel, size_t len, CharBuffer& buffer)
+{
+    buffer.resize(base.length() + len + 1);
+    copyASCII(base.characters(), base.length(), buffer.data());
+    memcpy(buffer.data() + base.length(), rel, len);
+    buffer[buffer.size() - 1] = '\0';
+}
+
 // FIXME: Move to PlatformString.h eventually.
 // Returns the index of the first index in string |s| of any of the characters
 // in |toFind|. |toFind| should be a null-terminated string, all characters up
@@ -300,7 +308,7 @@ KURL::KURL(const char* url)
 KURL::KURL(const String& url)
 {
     if (url[0] != '/') {
-        parse(url);
+        parse(url.utf8().data(), &url);
         return;
     }
 
@@ -399,15 +407,18 @@ void KURL::init(const KURL& base, const String& relative, const TextEncoding& en
         }
     }
 
+    CharBuffer parseBuffer;
+
     if (absolute) {
         parse(str, originalString);
     } else {
         // If the base is empty or opaque (e.g. data: or javascript:), then the URL is invalid
         // unless the relative URL is a single fragment.
         if (!base.isHierarchical()) {
-            if (str[0] == '#')
-                parse(base.m_string.left(base.m_queryEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
-            else {
+            if (str[0] == '#') {
+                appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
+                parse(parseBuffer.data(), 0);
+            } else {
                 m_string = relative;
                 invalidate();
             }
@@ -420,22 +431,28 @@ void KURL::init(const KURL& base, const String& relative, const TextEncoding& en
             // reference to the same document
             *this = base;
             break;
-        case '#':
+        case '#': {
             // must be fragment-only reference
-            parse(base.m_string.left(base.m_queryEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+            appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
+            parse(parseBuffer.data(), 0);
             break;
-        case '?':
+        }
+        case '?': {
             // query-only reference, special case needed for non-URL results
-            parse(base.m_string.left(base.m_pathEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+            appendASCII(base.m_string.left(base.m_pathEnd), str, len, parseBuffer);
+            parse(parseBuffer.data(), 0);
             break;
+        }
         case '/':
             // must be net-path or absolute-path reference
             if (str[1] == '/') {
                 // net-path
-                parse(base.m_string.left(base.m_schemeEnd + 1) + (allASCII ? String(str) : String::fromUTF8(str)));
+                appendASCII(base.m_string.left(base.m_schemeEnd + 1), str, len, parseBuffer);
+                parse(parseBuffer.data(), 0);
             } else {
                 // abs-path
-                parse(base.m_string.left(base.m_portEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+                appendASCII(base.m_string.left(base.m_portEnd), str, len, parseBuffer);
+                parse(parseBuffer.data(), 0);
             }
             break;
         default:
@@ -443,16 +460,15 @@ void KURL::init(const KURL& base, const String& relative, const TextEncoding& en
                 // must be relative-path reference
 
                 // Base part plus relative part plus one possible slash added in between plus terminating \0 byte.
-                CharBuffer buffer(base.m_pathEnd + 1 + len + 1);
+                parseBuffer.resize(base.m_pathEnd + 1 + len + 1);
 
-                char* bufferPos = buffer.data();
+                char* bufferPos = parseBuffer.data();
 
                 // first copy everything before the path from the base
                 unsigned baseLength = base.m_string.length();
                 const UChar* baseCharacters = base.m_string.characters();
                 CharBuffer baseStringBuffer(baseLength);
-                for (unsigned i = 0; i < baseLength; ++i)
-                    baseStringBuffer[i] = static_cast<char>(baseCharacters[i]);
+                copyASCII(baseCharacters, baseLength, baseStringBuffer.data());
                 const char* baseString = baseStringBuffer.data();
                 const char* baseStringStart = baseString;
                 const char* pathStart = baseStringStart + base.m_portEnd;
@@ -511,9 +527,9 @@ void KURL::init(const KURL& base, const String& relative, const TextEncoding& en
                 // of the relative reference; this will also add a null terminator
                 strcpy(bufferPos, relStringPos);
 
-                parse(buffer.data(), 0);
+                parse(parseBuffer.data(), 0);
 
-                ASSERT(strlen(buffer.data()) + 1 <= buffer.size());
+                ASSERT(strlen(parseBuffer.data()) + 1 <= parseBuffer.size());
                 break;
             }
         }
@@ -966,8 +982,11 @@ static inline bool matchLetter(char c, char lowercaseLetter)
 
 void KURL::parse(const String& string)
 {
-    CharBuffer buffer;
-    encodeRelativeString(string, UTF8Encoding(), buffer);
+    // FIXME: What should this do for non-ASCII URLs?
+    // Currently it throws away the high bytes of the characters in the string in that case, matching createCFURL().
+    CharBuffer buffer(string.length() + 1);
+    copyASCII(string.characters(), string.length(), buffer.data());
+    buffer[string.length()] = '\0';
     parse(buffer.data(), &string);
 }
 
index 5378e05..0b6cb29 100644 (file)
@@ -74,8 +74,6 @@ public:
 
     // The argument is an absolute URL string. The string is assumed to be
     // already encoded.
-    // FIXME: For characters with codes other than 0000-00FF will be chopped
-    // off, so this call is currently not safe to use with arbitrary strings.
     // FIXME: This constructor has a special case for strings that start with
     // "/", prepending "file://" to such strings; it would be good to get
     // rid of that special case.