URLParser should parse URLs with non-special schemes
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 9 Sep 2016 17:06:24 +0000 (17:06 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 9 Sep 2016 17:06:24 +0000 (17:06 +0000)
https://bugs.webkit.org/show_bug.cgi?id=161786

Reviewed by Andy Estes.

Source/WebCore:

Covered by new API tests.

* platform/URLParser.cpp:
(WebCore::URLParser::parse):
There's no reason for a SchemeEndCheckForSlashes state now that we can copy iterators.
It's not in the spec and not needed.
Also, move things around a little so parsing special or non-special schemes
followed by one or two slashes works correctly.

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::TEST_F):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@205749 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/platform/URLParser.cpp
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

index e995e43..e787361 100644 (file)
@@ -1,3 +1,19 @@
+2016-09-09  Alex Christensen  <achristensen@webkit.org>
+
+        URLParser should parse URLs with non-special schemes
+        https://bugs.webkit.org/show_bug.cgi?id=161786
+
+        Reviewed by Andy Estes.
+
+        Covered by new API tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::parse):
+        There's no reason for a SchemeEndCheckForSlashes state now that we can copy iterators.
+        It's not in the spec and not needed.
+        Also, move things around a little so parsing special or non-special schemes
+        followed by one or two slashes works correctly.
+
 2016-09-09  Chris Dumez  <cdumez@apple.com>
 
         Regression(r186020): Null dereference in getStartDate()
index 7ef8500..d811e81 100644 (file)
@@ -399,7 +399,6 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
     enum class State : uint8_t {
         SchemeStart,
         Scheme,
-        SchemeEndCheckForSlashes,
         NoScheme,
         SpecialRelativeOrAuthority,
         PathOrAuthority,
@@ -454,15 +453,34 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
                     ++c;
                     break;
                 }
+                m_buffer.append(':');
                 if (isSpecialScheme(urlScheme)) {
                     m_urlIsSpecial = true;
                     if (base.protocol() == urlScheme)
                         state = State::SpecialRelativeOrAuthority;
                     else
                         state = State::SpecialAuthoritySlashes;
-                } else
-                    state = State::SchemeEndCheckForSlashes;
-                m_buffer.append(':');
+                } else {
+                    m_url.m_userStart = m_buffer.length();
+                    m_url.m_userEnd = m_url.m_userStart;
+                    m_url.m_passwordEnd = m_url.m_userStart;
+                    m_url.m_hostEnd = m_url.m_userStart;
+                    m_url.m_portEnd = m_url.m_userStart;
+                    auto maybeSlash = c;
+                    ++maybeSlash;
+                    if (maybeSlash != end && *maybeSlash == '/') {
+                        m_buffer.append('/');
+                        m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+                        state = State::PathOrAuthority;
+                        ++c;
+                        ASSERT(*c == '/');
+                    } else {
+                        m_url.m_pathAfterLastSlash = m_url.m_userStart;
+                        state = State::CannotBeABaseURLPath;
+                    }
+                    ++c;
+                    break;
+                }
             } else {
                 m_buffer.clear();
                 state = State::NoScheme;
@@ -478,23 +496,6 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
                 c = codePoints.begin();
             }
             break;
-        case State::SchemeEndCheckForSlashes:
-            LOG_STATE("SchemeEndCheckForSlashes");
-            if (*c == '/') {
-                m_buffer.append("//");
-                m_url.m_userStart = m_buffer.length();
-                state = State::PathOrAuthority;
-                ++c;
-            } else {
-                m_url.m_userStart = m_buffer.length();
-                m_url.m_userEnd = m_url.m_userStart;
-                m_url.m_passwordEnd = m_url.m_userStart;
-                m_url.m_hostEnd = m_url.m_userStart;
-                m_url.m_portEnd = m_url.m_userStart;
-                m_url.m_pathAfterLastSlash = m_url.m_userStart,
-                state = State::CannotBeABaseURLPath;
-            }
-            break;
         case State::NoScheme:
             LOG_STATE("NoScheme");
             if (base.isNull()) {
@@ -531,6 +532,8 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
         case State::PathOrAuthority:
             LOG_STATE("PathOrAuthority");
             if (*c == '/') {
+                m_buffer.append('/');
+                m_url.m_userStart = m_buffer.length();
                 state = State::AuthorityOrHost;
                 ++c;
                 authorityOrHostBegin = c;
@@ -579,17 +582,15 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
             break;
         case State::SpecialAuthoritySlashes:
             LOG_STATE("SpecialAuthoritySlashes");
+            m_buffer.append("//");
             if (*c == '/') {
                 ++c;
                 while (c != end && isTabOrNewline(*c))
                     ++c;
                 if (c == end)
                     return failure(input);
-                m_buffer.append('/');
-                if (*c == '/') {
-                    m_buffer.append('/');
+                if (*c == '/')
                     ++c;
-                }
             }
             state = State::SpecialAuthorityIgnoreSlashes;
             break;
@@ -859,9 +860,6 @@ URL URLParser::parse(const String& input, const URL& base, const TextEncoding& e
     case State::Scheme:
         LOG_FINAL_STATE("Scheme");
         break;
-    case State::SchemeEndCheckForSlashes:
-        LOG_FINAL_STATE("SchemeEndCheckForSlashes");
-        break;
     case State::NoScheme:
         LOG_FINAL_STATE("NoScheme");
         break;
index 14ca6b7..26e77b7 100644 (file)
@@ -1,3 +1,13 @@
+2016-09-09  Alex Christensen  <achristensen@webkit.org>
+
+        URLParser should parse URLs with non-special schemes
+        https://bugs.webkit.org/show_bug.cgi?id=161786
+
+        Reviewed by Andy Estes.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::TEST_F):
+
 2016-09-08  Yusuke Suzuki  <utatane.tea@gmail.com>
 
         [WTF] HashTable's rehash is not compatible to Ref<T> and ASan
index e372c60..f8ab433 100644 (file)
@@ -193,6 +193,10 @@ TEST_F(URLParserTest, Basic)
     checkURL("http://host:", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
     checkURL("http://hos\tt\n:\t1\n2\t3\t/\npath", {"http", "", "", "host", 123, "/path", "", "", "http://host:123/path"});
     checkURL("http://user@example.org/path3", {"http", "user", "", "example.org", 0, "/path3", "", "", "http://user@example.org/path3"});
+    checkURL("sc:/pa/pa", {"sc", "", "", "", 0, "/pa/pa", "", "", "sc:/pa/pa"});
+    checkURL("sc:/pa", {"sc", "", "", "", 0, "/pa", "", "", "sc:/pa"});
+    checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});
+    checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
 
     // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
     // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -257,6 +261,8 @@ TEST_F(URLParserTest, ParseRelative)
     checkRelativeURL("", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
     checkRelativeURL("  \a  \t\n", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
     checkRelativeURL(":foo.com\\", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "http://example.org/foo/:foo.com/"});
+    checkRelativeURL("http:/example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});
+    checkRelativeURL("http:example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});
 }
 
 static void checkURLDifferences(const String& urlString, const ExpectedParts& partsNew, const ExpectedParts& partsOld)
@@ -402,6 +408,10 @@ TEST_F(URLParserTest, ParserDifferences)
     checkRelativeURLDifferences(":foo.com\\", "notspecial://example.org/foo/bar",
         {"notspecial", "", "", "example.org", 0, "/foo/:foo.com\\", "", "", "notspecial://example.org/foo/:foo.com\\"},
         {"notspecial", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "notspecial://example.org/foo/:foo.com/"});
+    checkURLDifferences("sc://pa",
+        {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"},
+        {"sc", "", "", "pa", 0, "", "", "", "sc://pa"});
+
     
     // This behavior matches Chrome and Firefox, but not WebKit using URL::parse.
     // The behavior of URL::parse is clearly wrong because reparsing file://path would make path the host.
@@ -476,8 +486,25 @@ TEST_F(URLParserTest, DefaultPort)
     checkURLDifferences("wss://host:444",
         {"wss", "", "", "host", 444, "/", "", "", "wss://host:444/"},
         {"wss", "", "", "host", 444, "", "", "", "wss://host:444"});
-    
-    // FIXME: Fix and check unknown schemes with ports, as well as ftps.
+
+    // 990 is the default ftps port in URL::parse, but it's not in the URL spec. Maybe it should be.
+    checkURL("ftps://host:990/", {"ftps", "", "", "host", 990, "/", "", "", "ftps://host:990/"});
+    checkURL("ftps://host:991/", {"ftps", "", "", "host", 991, "/", "", "", "ftps://host:991/"});
+    checkURLDifferences("ftps://host:990",
+        {"ftps", "", "", "host", 990, "/", "", "", "ftps://host:990/"},
+        {"ftps", "", "", "host", 990, "", "", "", "ftps://host:990"});
+    checkURLDifferences("ftps://host:991",
+        {"ftps", "", "", "host", 991, "/", "", "", "ftps://host:991/"},
+        {"ftps", "", "", "host", 991, "", "", "", "ftps://host:991"});
+
+    checkURL("unknown://host:80/", {"unknown", "", "", "host", 80, "/", "", "", "unknown://host:80/"});
+    checkURL("unknown://host:81/", {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"});
+    checkURLDifferences("unknown://host:80",
+        {"unknown", "", "", "host", 80, "/", "", "", "unknown://host:80/"},
+        {"unknown", "", "", "host", 80, "", "", "", "unknown://host:80"});
+    checkURLDifferences("unknown://host:81",
+        {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
+        {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
 }
     
 static void shouldFail(const String& urlString)