Punycode encode non-ascii hosts in URLParser
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 6 Sep 2016 23:36:36 +0000 (23:36 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 6 Sep 2016 23:36:36 +0000 (23:36 +0000)
https://bugs.webkit.org/show_bug.cgi?id=161655

Reviewed by Tim Horton.

Source/WebCore:

Covered by new API tests based on the web platform tests.

* platform/URLParser.cpp:
(WebCore::URLParser::parse):
(WebCore::containsOnlyASCII):
(WebCore::domainToASCII):

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::wideString):
(TestWebKitAPI::TEST_F):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@205521 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/platform/URLParser.cpp
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

index f8c1a6f..277ee93 100644 (file)
@@ -1,3 +1,17 @@
+2016-09-06  Alex Christensen  <achristensen@webkit.org>
+
+        Punycode encode non-ascii hosts in URLParser
+        https://bugs.webkit.org/show_bug.cgi?id=161655
+
+        Reviewed by Tim Horton.
+
+        Covered by new API tests based on the web platform tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::parse):
+        (WebCore::containsOnlyASCII):
+        (WebCore::domainToASCII):
+
 2016-09-06  Saam Barati  <sbarati@apple.com>
 
         Make JSMap and JSSet faster
index 14c2511..8c617db 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "Logging.h"
 #include <array>
+#include <unicode/uidna.h>
 #include <wtf/HashMap.h>
 #include <wtf/NeverDestroyed.h>
 #include <wtf/text/StringBuilder.h>
@@ -450,7 +451,8 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
         case State::SchemeEndCheckForSlashes:
             LOG_STATE("SchemeEndCheckForSlashes");
             if (*c == '/') {
-                m_buffer.append('/');
+                m_buffer.append("//");
+                m_url.m_userStart = m_buffer.length();
                 state = State::PathOrAuthority;
                 ++c;
             } else {
@@ -1269,11 +1271,44 @@ static String percentDecode(const String& input)
     return output.toStringPreserveCapacity();
 }
 
+static bool containsOnlyASCII(const String& string)
+{
+    if (string.is8Bit())
+        return charactersAreAllASCII(string.characters8(), string.length());
+    return charactersAreAllASCII(string.characters16(), string.length());
+}
+
 static Optional<String> domainToASCII(const String& domain)
 {
-    // FIXME: Implement correctly
-    CString utf8 = domain.utf8();
-    return String(utf8.data(), utf8.length());
+    const unsigned hostnameBufferLength = 2048;
+
+    if (containsOnlyASCII(domain)) {
+        if (domain.is8Bit())
+            return domain;
+        Vector<LChar, hostnameBufferLength> buffer;
+        size_t length = domain.length();
+        buffer.reserveInitialCapacity(length);
+        for (size_t i = 0; i < length; ++i)
+            buffer.append(domain[i]);
+        return String(buffer.data(), length);
+    }
+    
+    UChar hostnameBuffer[hostnameBufferLength];
+    UErrorCode error = U_ZERO_ERROR;
+    
+    int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
+
+    if (error == U_ZERO_ERROR) {
+        LChar buffer[hostnameBufferLength];
+        for (int32_t i = 0; i < numCharactersConverted; ++i) {
+            ASSERT(isASCII(hostnameBuffer[i]));
+            buffer[i] = hostnameBuffer[i];
+        }
+        return String(buffer, numCharactersConverted);
+    }
+
+    // FIXME: Check for U_BUFFER_OVERFLOW_ERROR and retry with an allocated buffer.
+    return Nullopt;
 }
 
 static bool hasInvalidDomainCharacter(const String& asciiDomain)
index 9cab6f8..25d00b9 100644 (file)
@@ -1,3 +1,14 @@
+2016-09-06  Alex Christensen  <achristensen@webkit.org>
+
+        Punycode encode non-ascii hosts in URLParser
+        https://bugs.webkit.org/show_bug.cgi?id=161655
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::wideString):
+        (TestWebKitAPI::TEST_F):
+
 2016-09-05  Alex Christensen  <achristensen@webkit.org>
 
         Implement relative file urls and begin implementing character encoding in URLParser
index e635f98..e08dbc2 100644 (file)
@@ -84,6 +84,16 @@ static void checkURL(const String& urlString, const ExpectedParts& parts)
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
 }
 
+template<size_t length>
+static String wideString(const wchar_t (&url)[length])
+{
+    StringBuilder builder;
+    builder.reserveCapacity(length - 1);
+    for (size_t i = 0; i < length - 1; ++i)
+        builder.append(url[i]);
+    return builder.toString();
+}
+
 TEST_F(URLParserTest, Basic)
 {
     checkURL("http://user:pass@webkit.org:123/path?query#fragment", {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment", "http://user:pass@webkit.org:123/path?query#fragment"});
@@ -209,6 +219,8 @@ TEST_F(URLParserTest, ParseRelative)
     checkRelativeURL("//whatwg.org/index.html", "https://www.webkit.org/path", {"https", "", "", "whatwg.org", 0, "/index.html", "", "", "https://whatwg.org/index.html"});
     checkRelativeURL("http://example\t.\norg", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/", "", "", "http://example.org/"});
     checkRelativeURL("test", "file:///path1/path2", {"file", "", "", "", 0, "/path1/test", "", "", "file:///path1/test"});
+    checkRelativeURL(wideString(L"http://www.foo。bar.com"), "http://other.com/", {"http", "", "", "www.foo.bar.com", 0, "/", "", "", "http://www.foo.bar.com/"});
+    checkRelativeURL(wideString(L"sc://ñ.test/"), "about:blank", {"sc", "", "", "xn--ida.test", 0, "/", "", "", "sc://xn--ida.test/"});
 }
 
 static void checkURLDifferences(const String& urlString, const ExpectedParts& partsNew, const ExpectedParts& partsOld)
@@ -351,6 +363,9 @@ TEST_F(URLParserTest, ParserDifferences)
         {"http", "", "", "host%73", 0, "/", "", "", "http://host%73/"});
     
     // URLParser matches Chrome and the spec, but not URL::parse or Firefox.
+    checkURLDifferences(wideString(L"http://0Xc0.0250.01"),
+        {"http", "", "", "192.168.0.1", 0, "/", "", "", "http://192.168.0.1/"},
+        {"http", "", "", "0xc0.0250.01", 0, "/", "", "", "http://0xc0.0250.01/"});
     checkURLDifferences("http://host/path%2e.%2E",
         {"http", "", "", "host", 0, "/path...", "", "", "http://host/path..."},
         {"http", "", "", "host", 0, "/path%2e.%2E", "", "", "http://host/path%2e.%2E"});