--- /dev/null
+<head><meta charset="iso-8859-1"></head>
+<body>
+<p>Test how URLs are encoded.</p>
+<ol id=result></ol>
+<script>
+ function log(message)
+ {
+ var item = document.createElement("li");
+ item.appendChild(document.createTextNode(message));
+ document.getElementById("result").appendChild(item);
+ }
+
+ if (window.layoutTestController) {
+ layoutTestController.waitUntilDone();
+ layoutTestController.dumpAsText();
+ }
+
+ <!-- Firefox 3 encodes query part of XMLHttpRequest requests as UTF-8, but WebKit doesn't special case these. -->
+ var req = new XMLHttpRequest;
+ req.open("GET", "/xmlhttprequest/resources/print-query.cgi?Zürich", false);
+ req.send();
+ log("abs-path: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+ req.open("GET", "resources/print-query.cgi?Zürich", false);
+ req.send();
+ log("rel-path: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+ req.open("GET", document.URL.replace(/encode-request-url-2\.html/, "") + "resources/print-query.cgi?Zürich", false);
+ req.send();
+ log("absolute: " + (req.responseText == "Z%FCrich" ? "PASS" : "FAIL (" + req.responseText + ")"));
+
+ var framesLoaded = 0;
+ function frameLoaded()
+ {
+ ++framesLoaded;
+ if (framesLoaded == 2) {
+ log("Frame, abs-path: " + (frames[0].document.documentElement.textContent == "Z%FCrich" ? "PASS" : "FAIL (" + frames[0].document.documentElement.textContent + ")"));
+ log("Frame, rel-path: " + (frames[1].document.documentElement.textContent == "Z%FCrich" ? "PASS" : "FAIL (" + frames[0].document.documentElement.textContent + ")"));
+
+ if (window.layoutTestController)
+ layoutTestController.notifyDone();
+ }
+ }
+</script>
+<iframe src="/xmlhttprequest/resources/print-query.cgi?Zürich" onload="frameLoaded()"></iframe>
+<iframe src="resources/print-query.cgi?Zürich" onload="frameLoaded()"></iframe>
+</body>
+2009-01-24 Alexey Proskuryakov <ap@webkit.org>
+
+ Reviewed by Darin Adler.
+
+ <rdar://problem/6368059> REGRESSION: URL encoding problems on http://www.cineman.ch
+
+ Test: http/tests/xmlhttprequest/encode-request-url-2.html
+
+ * platform/KURL.cpp:
+ (WebCore::appendASCII): Added a helper function.
+ (WebCore::KURL::KURL): Explicitly encode the URL to call two-argument parse() with better
+ specified behavior.
+ (WebCore::KURL::init): Ditto. This avoids trying to round-trip an URL encoded into a byte
+ stream, but not yet transformed to ASCII-only using percent escapes. Since different parts
+ of the byte stream can use different encodings, round-tripping is not possible.
+ (WebCore::KURL::parse): Reverted an earlier change that made the single-argument version of
+ this function convert the string to utf-8. I think that on the remanining code paths, it is
+ correct to assume that the string is all ASCII, but I'm not yet confident enough to drop a
+ FIXME warning.
+
2009-01-24 Jan Michael Alonzo <jmalonzo@webkit.org>
Gtk build fix after r40170
dest[i] = static_cast<char>(src[i]);
}
+static void appendASCII(const String& base, const char* rel, size_t len, CharBuffer& buffer)
+{
+ buffer.resize(base.length() + len + 1);
+ copyASCII(base.characters(), base.length(), buffer.data());
+ memcpy(buffer.data() + base.length(), rel, len);
+ buffer[buffer.size() - 1] = '\0';
+}
+
// FIXME: Move to PlatformString.h eventually.
// Returns the index of the first index in string |s| of any of the characters
// in |toFind|. |toFind| should be a null-terminated string, all characters up
KURL::KURL(const String& url)
{
if (url[0] != '/') {
- parse(url);
+ parse(url.utf8().data(), &url);
return;
}
}
}
+ CharBuffer parseBuffer;
+
if (absolute) {
parse(str, originalString);
} else {
// If the base is empty or opaque (e.g. data: or javascript:), then the URL is invalid
// unless the relative URL is a single fragment.
if (!base.isHierarchical()) {
- if (str[0] == '#')
- parse(base.m_string.left(base.m_queryEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
- else {
+ if (str[0] == '#') {
+ appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
+ parse(parseBuffer.data(), 0);
+ } else {
m_string = relative;
invalidate();
}
// reference to the same document
*this = base;
break;
- case '#':
+ case '#': {
// must be fragment-only reference
- parse(base.m_string.left(base.m_queryEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+ appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
+ parse(parseBuffer.data(), 0);
break;
- case '?':
+ }
+ case '?': {
// query-only reference, special case needed for non-URL results
- parse(base.m_string.left(base.m_pathEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+ appendASCII(base.m_string.left(base.m_pathEnd), str, len, parseBuffer);
+ parse(parseBuffer.data(), 0);
break;
+ }
case '/':
// must be net-path or absolute-path reference
if (str[1] == '/') {
// net-path
- parse(base.m_string.left(base.m_schemeEnd + 1) + (allASCII ? String(str) : String::fromUTF8(str)));
+ appendASCII(base.m_string.left(base.m_schemeEnd + 1), str, len, parseBuffer);
+ parse(parseBuffer.data(), 0);
} else {
// abs-path
- parse(base.m_string.left(base.m_portEnd) + (allASCII ? String(str) : String::fromUTF8(str)));
+ appendASCII(base.m_string.left(base.m_portEnd), str, len, parseBuffer);
+ parse(parseBuffer.data(), 0);
}
break;
default:
// must be relative-path reference
// Base part plus relative part plus one possible slash added in between plus terminating \0 byte.
- CharBuffer buffer(base.m_pathEnd + 1 + len + 1);
+ parseBuffer.resize(base.m_pathEnd + 1 + len + 1);
- char* bufferPos = buffer.data();
+ char* bufferPos = parseBuffer.data();
// first copy everything before the path from the base
unsigned baseLength = base.m_string.length();
const UChar* baseCharacters = base.m_string.characters();
CharBuffer baseStringBuffer(baseLength);
- for (unsigned i = 0; i < baseLength; ++i)
- baseStringBuffer[i] = static_cast<char>(baseCharacters[i]);
+ copyASCII(baseCharacters, baseLength, baseStringBuffer.data());
const char* baseString = baseStringBuffer.data();
const char* baseStringStart = baseString;
const char* pathStart = baseStringStart + base.m_portEnd;
// of the relative reference; this will also add a null terminator
strcpy(bufferPos, relStringPos);
- parse(buffer.data(), 0);
+ parse(parseBuffer.data(), 0);
- ASSERT(strlen(buffer.data()) + 1 <= buffer.size());
+ ASSERT(strlen(parseBuffer.data()) + 1 <= parseBuffer.size());
break;
}
}
void KURL::parse(const String& string)
{
- CharBuffer buffer;
- encodeRelativeString(string, UTF8Encoding(), buffer);
+ // FIXME: What should this do for non-ASCII URLs?
+ // Currently it throws away the high bytes of the characters in the string in that case, matching createCFURL().
+ CharBuffer buffer(string.length() + 1);
+ copyASCII(string.characters(), string.length(), buffer.data());
+ buffer[string.length()] = '\0';
parse(buffer.data(), &string);
}