Make URLParser work with URLs missing URL parts
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 12 Aug 2016 23:12:46 +0000 (23:12 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 12 Aug 2016 23:12:46 +0000 (23:12 +0000)
https://bugs.webkit.org/show_bug.cgi?id=160824

Reviewed by Brady Eidson.

Source/WebCore:

My initial implementation of URLParser didn't work correctly with URLs missing parts,
like a URL with no fragment, or a URL with no query.  This fixes and tests parsing such URLS.
Covered by new API tests.

* platform/URLParser.cpp:
(WebCore::URLParser::parse):
(WebCore::URLParser::allValuesEqual):
* platform/URLParser.h:
(WebCore::URLParser::parse):

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::s):
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):
(TestWebKitAPI::eq): Deleted.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@204431 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/platform/URLParser.cpp
Source/WebCore/platform/URLParser.h
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

index b39fdb1..79694b8 100644 (file)
@@ -1,3 +1,20 @@
+2016-08-12  Alex Christensen  <achristensen@webkit.org>
+
+        Make URLParser work with URLs missing URL parts
+        https://bugs.webkit.org/show_bug.cgi?id=160824
+
+        Reviewed by Brady Eidson.
+
+        My initial implementation of URLParser didn't work correctly with URLs missing parts,
+        like a URL with no fragment, or a URL with no query.  This fixes and tests parsing such URLS.
+        Covered by new API tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::parse):
+        (WebCore::URLParser::allValuesEqual):
+        * platform/URLParser.h:
+        (WebCore::URLParser::parse):
+
 2016-08-12  Johan K. Jensen  <johan_jensen@apple.com>
 
         Rename DocumentLoadTiming and ResourceLoadTiming
index 83d055f..342e20e 100644 (file)
@@ -31,7 +31,6 @@
 
 namespace WebCore {
 
-// 1. Infrastructure
 static bool isC0Control(const StringView::CodePoints::Iterator& c) { return *c <= 0x001F; }
 static bool isC0ControlOrSpace(const StringView::CodePoints::Iterator& c) { return isC0Control(c) || *c == 0x0020; }
 static bool isTabOrNewline(const StringView::CodePoints::Iterator& c) { return *c == 0x0009 || *c == 0x000A || *c == 0x000D; }
@@ -39,7 +38,6 @@ static bool isASCIIDigit(const StringView::CodePoints::Iterator& c) { return *c
 static bool isASCIIAlpha(const StringView::CodePoints::Iterator& c) { return (*c >= 0x0041 && *c <= 0x005A) || (*c >= 0x0061 && *c <= 0x007A); }
 static bool isASCIIAlphanumeric(const StringView::CodePoints::Iterator& c) { return isASCIIDigit(c) || isASCIIAlpha(c); }
     
-// 4. URLs
 static bool isSpecialScheme(const String& scheme)
 {
     return scheme == "ftp"
@@ -65,7 +63,7 @@ Optional<URL> URLParser::parse(const String& input, const URL& base, const TextE
     enum class State : uint8_t {
         SchemeStart,
         Scheme,
-        SchemeEndCheckForSlashes, // Scheme state steps 2. 8.
+        SchemeEndCheckForSlashes,
         NoScheme,
         SpecialRelativeOrAuthority,
         PathOrAuthority,
@@ -113,6 +111,7 @@ Optional<URL> URLParser::parse(const String& input, const URL& base, const TextE
             else if (*c == ':') {
                 url.m_schemeEnd = buffer.length();
                 String urlScheme = buffer.toString(); // FIXME: Find a way to do this without shrinking the buffer.
+                url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
                 if (urlScheme == "file")
                     state = State::File;
                 else if (isSpecialScheme(urlScheme)) {
@@ -271,9 +270,11 @@ Optional<URL> URLParser::parse(const String& input, const URL& base, const TextE
         case State::Path:
             LOG_STATE("Path");
             if (*c == '/') {
+                buffer.append('/');
+                url.m_pathAfterLastSlash = buffer.length();
                 ++c;
                 if (c == end)
-                    return Nullopt;
+                    break;
                 if (*c == '.') {
                     ++c;
                     if (c == end)
@@ -282,7 +283,6 @@ Optional<URL> URLParser::parse(const String& input, const URL& base, const TextE
                         notImplemented();
                     notImplemented();
                 }
-                buffer.append('/');
             } else if (*c == '?') {
                 url.m_pathEnd = buffer.length();
                 state = State::Query;
@@ -318,8 +318,78 @@ Optional<URL> URLParser::parse(const String& input, const URL& base, const TextE
             break;
         }
     }
+    
+    switch (state) {
+    case State::SchemeStart:
+    case State::Scheme:
+    case State::SchemeEndCheckForSlashes:
+    case State::NoScheme:
+    case State::SpecialRelativeOrAuthority:
+    case State::PathOrAuthority:
+    case State::Relative:
+    case State::RelativeSlash:
+    case State::SpecialAuthoritySlashes:
+    case State::SpecialAuthorityIgnoreSlashes:
+    case State::Authority:
+        break;
+    case State::Host:
+    case State::Hostname:
+        url.m_hostEnd = buffer.length();
+        url.m_portEnd = url.m_hostEnd;
+        buffer.append('/');
+        url.m_pathEnd = url.m_hostEnd + 1;
+        url.m_pathAfterLastSlash = url.m_pathEnd;
+        url.m_queryEnd = url.m_pathEnd;
+        url.m_fragmentEnd = url.m_pathEnd;
+        break;
+    case State::Port:
+        url.m_portEnd = buffer.length();
+        buffer.append('/');
+        url.m_pathEnd = url.m_portEnd + 1;
+        url.m_pathAfterLastSlash = url.m_pathEnd;
+        url.m_queryEnd = url.m_pathEnd;
+        url.m_fragmentEnd = url.m_pathEnd;
+        break;
+    case State::File:
+    case State::FileSlash:
+    case State::FileHost:
+    case State::PathStart:
+    case State::Path:
+        url.m_pathEnd = buffer.length();
+        url.m_queryEnd = url.m_pathEnd;
+        url.m_fragmentEnd = url.m_pathEnd;
+        break;
+    case State::CannotBeABaseURLPath:
+        break;
+    case State::Query:
+        url.m_queryEnd = buffer.length();
+        url.m_fragmentEnd = url.m_queryEnd;
+        break;
+    case State::Fragment:
+        url.m_fragmentEnd = buffer.length();
+        break;
+    }
+
     url.m_string = buffer.toString();
+    url.m_isValid = true;
     return url;
 }
-    
+
+bool URLParser::allValuesEqual(const URL& a, const URL& b)
+{
+    return a.m_string == b.m_string
+        && a.m_isValid == b.m_isValid
+        && a.m_protocolIsInHTTPFamily == b.m_protocolIsInHTTPFamily
+        && a.m_schemeEnd == b.m_schemeEnd
+        && a.m_userStart == b.m_userStart
+        && a.m_userEnd == b.m_userEnd
+        && a.m_passwordEnd == b.m_passwordEnd
+        && a.m_hostEnd == b.m_hostEnd
+        && a.m_portEnd == b.m_portEnd
+        && a.m_pathAfterLastSlash == b.m_pathAfterLastSlash
+        && a.m_pathEnd == b.m_pathEnd
+        && a.m_queryEnd == b.m_queryEnd
+        && a.m_fragmentEnd == b.m_fragmentEnd;
+}
+
 } // namespace WebCore
index 74f9334..6bcba27 100644 (file)
@@ -34,6 +34,7 @@ namespace WebCore {
 class URLParser {
 public:
     WEBCORE_EXPORT static Optional<URL> parse(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding());
+    WEBCORE_EXPORT static bool allValuesEqual(const URL&, const URL&);
 };
 
 }
index ffe88b6..3311eb4 100644 (file)
@@ -1,3 +1,16 @@
+2016-08-12  Alex Christensen  <achristensen@webkit.org>
+
+        Make URLParser work with URLs missing URL parts
+        https://bugs.webkit.org/show_bug.cgi?id=160824
+
+        Reviewed by Brady Eidson.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::s):
+        (TestWebKitAPI::checkURL):
+        (TestWebKitAPI::TEST_F):
+        (TestWebKitAPI::eq): Deleted.
+
 2016-08-12  John Wilander  <wilander@apple.com>
 
         Add HashCountedSet API tests to TestWTFLibrary target
index cad4ce6..5b4478e 100644 (file)
@@ -47,25 +47,45 @@ struct ExpectedParts {
     String path;
     String query;
     String fragment;
+    String string;
 };
-    
-static void eq(const String& s1, const String& s2) { EXPECT_STREQ(s1.utf8().data(), s2.utf8().data()); }
-static void checkURL(const URL& url, const ExpectedParts& parts)
+
+static const char* s(const String& s) { return s.utf8().data(); }
+static void checkURL(const String& urlString, const ExpectedParts& parts)
 {
-    eq(url.protocol(), parts.protocol);
-    eq(url.user(), parts.user);
-    eq(url.pass(), parts.password);
-    eq(url.host(), parts.host);
-    EXPECT_EQ(url.port(), parts.port);
-    eq(url.path(), parts.path);
-    eq(url.query(), parts.query);
-    eq(url.fragmentIdentifier(), parts.fragment);
+    auto url = URLParser::parse(urlString);
+    EXPECT_STREQ(s(parts.protocol), s(url->protocol()));
+    EXPECT_STREQ(s(parts.user), s(url->user()));
+    EXPECT_STREQ(s(parts.password), s(url->pass()));
+    EXPECT_STREQ(s(parts.host), s(url->host()));
+    EXPECT_EQ(parts.port, url->port());
+    EXPECT_STREQ(s(parts.path), s(url->path()));
+    EXPECT_STREQ(s(parts.query), s(url->query()));
+    EXPECT_STREQ(s(parts.fragment), s(url->fragmentIdentifier()));
+    EXPECT_STREQ(s(parts.string), s(url->string()));
+    
+    auto oldURL = URL(URL(), urlString);
+    EXPECT_STREQ(s(parts.protocol), s(oldURL.protocol()));
+    EXPECT_STREQ(s(parts.user), s(oldURL.user()));
+    EXPECT_STREQ(s(parts.password), s(oldURL.pass()));
+    EXPECT_STREQ(s(parts.host), s(oldURL.host()));
+    EXPECT_EQ(parts.port, oldURL.port());
+    EXPECT_STREQ(s(parts.path), s(oldURL.path()));
+    EXPECT_STREQ(s(parts.query), s(oldURL.query()));
+    EXPECT_STREQ(s(parts.fragment), s(oldURL.fragmentIdentifier()));
+    EXPECT_STREQ(s(parts.string), s(oldURL.string()));
+    
+    EXPECT_TRUE(URLParser::allValuesEqual(url.value(), oldURL));
 }
 
 TEST_F(URLParserTest, Parse)
 {
-    auto url = URLParser::parse("http://user:pass@webkit.org:123/path?query#fragment");
-    checkURL(url.value(), {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment"});
+    checkURL("http://user:pass@webkit.org:123/path?query#fragment", {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment", "http://user:pass@webkit.org:123/path?query#fragment"});
+    checkURL("http://user:pass@webkit.org:123/path?query", {"http", "user", "pass", "webkit.org", 123, "/path", "query", "", "http://user:pass@webkit.org:123/path?query"});
+    checkURL("http://user:pass@webkit.org:123/path", {"http", "user", "pass", "webkit.org", 123, "/path", "", "", "http://user:pass@webkit.org:123/path"});
+    checkURL("http://user:pass@webkit.org:123/", {"http", "user", "pass", "webkit.org", 123, "/", "", "", "http://user:pass@webkit.org:123/"});
+    checkURL("http://user:pass@webkit.org:123", {"http", "user", "pass", "webkit.org", 123, "/", "", "", "http://user:pass@webkit.org:123/"});
+    checkURL("http://user:pass@webkit.org", {"http", "user", "pass", "webkit.org", 0, "/", "", "", "http://user:pass@webkit.org/"});
 }
 
 } // namespace TestWebKitAPI