2011-04-24 Maciej Stachowiak <mjs@apple.com>
authormjs@apple.com <mjs@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 24 Apr 2011 22:53:59 +0000 (22:53 +0000)
committermjs@apple.com <mjs@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 24 Apr 2011 22:53:59 +0000 (22:53 +0000)
        Reviewed by George Staikos.

        Handling of URLs like http:/example.com/ is incorrect
        https://bugs.webkit.org/show_bug.cgi?id=59300
        <rdar://problem/9231956>

        URLs like http:/example.com/ or http:example.com/ are now correctly
        canonicalized as http://example.com/

        The code still doesn't quite match other browsers - at least some
        other browsers seem to base parsing behavior on whether they
        recongize a scheme from a fixed list, and ignore whether // is
        present in the URL or not.

        * platform/KURL.cpp:
        (WebCore::isNonFileHierarchicalScheme): New helper function.
        (WebCore::KURL::parse): For a particular list of whitelisted schemes,
        assume they are hierarchical and need an authority even if there is no //
        after the :/
2011-04-24  Maciej Stachowiak  <mjs@apple.com>

        Reviewed by George Staikos.

        Handling of URLs like http:/example.com/ is incorrect
        https://bugs.webkit.org/show_bug.cgi?id=59300
        <rdar://problem/9231956>

        Added some new test cases. Also updated results for the
        change. The changes are progressions.

        * fast/url/relative-expected.txt:
        * fast/url/script-tests/segments.js:
        * fast/url/script-tests/standard-url.js:
        * fast/url/segments-expected.txt:
        * fast/url/segments-from-data-url-expected.txt:
        * fast/url/standard-url-expected.txt:

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@84762 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/url/relative-expected.txt
LayoutTests/fast/url/script-tests/segments.js
LayoutTests/fast/url/script-tests/standard-url.js
LayoutTests/fast/url/segments-expected.txt
LayoutTests/fast/url/segments-from-data-url-expected.txt
LayoutTests/fast/url/standard-url-expected.txt
Source/WebCore/ChangeLog
Source/WebCore/platform/KURL.cpp

index 7427b15..0d77bc0 100644 (file)
@@ -1,3 +1,21 @@
+2011-04-24  Maciej Stachowiak  <mjs@apple.com>
+
+        Reviewed by George Staikos.
+
+        Handling of URLs like http:/example.com/ is incorrect
+        https://bugs.webkit.org/show_bug.cgi?id=59300
+        <rdar://problem/9231956>
+        
+        Added some new test cases. Also updated results for the
+        change. The changes are progressions.
+
+        * fast/url/relative-expected.txt:
+        * fast/url/script-tests/segments.js:
+        * fast/url/script-tests/standard-url.js:
+        * fast/url/segments-expected.txt:
+        * fast/url/segments-from-data-url-expected.txt:
+        * fast/url/standard-url-expected.txt:
+
 2011-04-24  Adam Barth  <abarth@webkit.org>
 
         Update Chromium results for standard-url.html
index 8ee781c..7f93285 100644 (file)
@@ -13,9 +13,9 @@ PASS canonicalize('  .  ') is 'http://foo/'
 PASS canonicalize('     ') is 'http://foo/bar'
 PASS canonicalize('http:path') is 'http://host/path'
 PASS canonicalize('http:path') is 'http://host/a/path'
-FAIL canonicalize('http:/path') should be http://host/path. Was http:/path.
+FAIL canonicalize('http:/path') should be http://host/path. Was http://path/.
 FAIL canonicalize('HTTP:/path') should be http://host/path. Was http:/path.
-FAIL canonicalize('https:host2') should be https://host2/. Was https:host2.
+PASS canonicalize('https:host2') is 'https://host2/'
 PASS canonicalize('htto:/host2') is 'htto:/host2'
 PASS canonicalize('/b/c/d') is 'http://host/b/c/d'
 PASS canonicalize('\\b\\c\\d') is 'http://host/b/c/d'
@@ -42,7 +42,7 @@ FAIL canonicalize('baz.html') should be . Was baz.html.
 PASS canonicalize('data:baz') is 'data:baz'
 PASS canonicalize('data:/base') is 'data:/base'
 PASS canonicalize('http://host/') is 'http://host/'
-FAIL canonicalize('http:host') should be http://host/. Was http:host.
+PASS canonicalize('http:host') is 'http://host/'
 PASS canonicalize('./asd:fgh') is 'http://foo/asd:fgh'
 PASS canonicalize(':foo') is 'http://foo/:foo'
 PASS canonicalize(' hello world') is 'http://foo/hello%20world'
index a826703..037f243 100644 (file)
@@ -73,14 +73,25 @@ cases = [
   ["https:/example.com/",                    ["https:","example.com","0","/","",""]],
   ["madeupscheme:/example.com/",             ["madeupscheme:","","0","/example.com/","",""]],
   ["file:/example.com/",                     ["file:","","0","/example.com/","",""]],
-  ["fops:/example.com/",                     ["fops:","","0","/example.com/","",""]],
+  ["ftps:/example.com/",                     ["ftps:","","0","/example.com/","",""]],
   ["gopher:/example.com/",                   ["gopher:","example.com","0","/","",""]],
   ["ws:/example.com/",                       ["ws:","example.com","0","/","",""]],
   ["wss:/example.com/",                      ["wss:","example.com","0","/","",""]],
   ["data:/example.com/",                     ["data:","","0","/example.com/","",""]],
   ["javascript:/example.com/",               ["javascript:","","0","/example.com/","",""]],
   ["mailto:/example.com/",                   ["mailto:","","0","/example.com/","",""]],
-  
+  ["http:example.com/",                      ["http:","example.org","0","/foo/example.com/","",""]],
+  ["ftp:example.com/",                       ["ftp:","example.com","0","/","",""]],
+  ["https:example.com/",                     ["https:","example.com","0","/","",""]],
+  ["madeupscheme:example.com/",              ["madeupscheme:","","0","example.com/","",""]],
+  ["file:example.com/",                      ["file:","","0","/example.com/","",""]],
+  ["ftps:example.com/",                      ["ftps:","","0","example.com/","",""]],
+  ["gopher:example.com/",                    ["gopher:","example.com","0","/","",""]],
+  ["ws:example.com/",                        ["ws:","example.com","0","/","",""]],
+  ["wss:example.com/",                       ["wss:","example.com","0","/","",""]],
+  ["data:example.com/",                      ["data:","","0","example.com/","",""]],
+  ["javascript:example.com/",                ["javascript:","","0","example.com/","",""]],
+  ["mailto:example.com/",                    ["mailto:","","0","example.com/","",""]],  
 ];
 
 var originalBaseURL = canonicalize(".");
index ef2f7a7..27afe76 100644 (file)
@@ -40,13 +40,24 @@ cases = [
   ["https:/example.com/", "https://example.com/"],
   ["madeupscheme:/example.com/", "madeupscheme:/example.com/"],
   ["file:/example.com/", "file://localhost/example.com/"],
-  ["fops:/example.com/", "fops:/example.com/"],
+  ["ftps:/example.com/", "ftps:/example.com/"],
   ["gopher:/example.com/", "gopher://example.com/"],
   ["ws:/example.com/", "ws://example.com/"],
   ["wss:/example.com/", "wss://example.com/"],
   ["data:/example.com/", "data:/example.com/"],
   ["javascript:/example.com/", "javascript:/example.com/"],
   ["mailto:/example.com/", "mailto:/example.com/"],
+  ["http:example.com/", "http://example.com/"],
+  ["ftp:example.com/", "ftp://example.com/"],
+  ["https:example.com/", "https://example.com/"],
+  ["madeupscheme:example.com/", "madeupscheme:example.com/"],
+  ["ftps:example.com/", "ftps:example.com/"],
+  ["gopher:example.com/", "gopher://example.com/"],
+  ["ws:example.com/", "ws://example.com/"],
+  ["wss:example.com/", "wss://example.com/"],
+  ["data:example.com/", "data:example.com/"],
+  ["javascript:example.com/", "javascript:example.com/"],
+  ["mailto:example.com/", "mailto:example.com/"],
 ];
 
 for (var i = 0; i < cases.length; ++i) {
index b98c50f..38ea324 100644 (file)
@@ -69,18 +69,30 @@ PASS segments('http://2001::1]:80') is '[":","","0","","",""]'
 PASS segments('http://[2001::1]') is '["http:","[2001::1]","0","/","",""]'
 PASS segments('http://[2001::1]:80') is '["http:","[2001::1]","0","/","",""]'
 PASS segments('http://[[::]]') is '[":","","0","","",""]'
-FAIL segments('http:/example.com/') should be ["http:","example.org","0","/example.com/","",""]. Was ["http:","","0","/example.com/","",""].
-FAIL segments('ftp:/example.com/') should be ["ftp:","example.com","0","/","",""]. Was ["ftp:","","0","/example.com/","",""].
-FAIL segments('https:/example.com/') should be ["https:","example.com","0","/","",""]. Was ["https:","","0","/example.com/","",""].
+FAIL segments('http:/example.com/') should be ["http:","example.org","0","/example.com/","",""]. Was ["http:","example.com","0","/","",""].
+PASS segments('ftp:/example.com/') is '["ftp:","example.com","0","/","",""]'
+PASS segments('https:/example.com/') is '["https:","example.com","0","/","",""]'
 PASS segments('madeupscheme:/example.com/') is '["madeupscheme:","","0","/example.com/","",""]'
 PASS segments('file:/example.com/') is '["file:","","0","/example.com/","",""]'
-PASS segments('fops:/example.com/') is '["fops:","","0","/example.com/","",""]'
-FAIL segments('gopher:/example.com/') should be ["gopher:","example.com","0","/","",""]. Was ["gopher:","","0","/example.com/","",""].
-FAIL segments('ws:/example.com/') should be ["ws:","example.com","0","/","",""]. Was ["ws:","","0","/example.com/","",""].
-FAIL segments('wss:/example.com/') should be ["wss:","example.com","0","/","",""]. Was ["wss:","","0","/example.com/","",""].
+PASS segments('ftps:/example.com/') is '["ftps:","","0","/example.com/","",""]'
+PASS segments('gopher:/example.com/') is '["gopher:","example.com","0","/","",""]'
+PASS segments('ws:/example.com/') is '["ws:","example.com","0","/","",""]'
+PASS segments('wss:/example.com/') is '["wss:","example.com","0","/","",""]'
 PASS segments('data:/example.com/') is '["data:","","0","/example.com/","",""]'
 PASS segments('javascript:/example.com/') is '["javascript:","","0","/example.com/","",""]'
 PASS segments('mailto:/example.com/') is '["mailto:","","0","/example.com/","",""]'
+PASS segments('http:example.com/') is '["http:","example.org","0","/foo/example.com/","",""]'
+PASS segments('ftp:example.com/') is '["ftp:","example.com","0","/","",""]'
+PASS segments('https:example.com/') is '["https:","example.com","0","/","",""]'
+PASS segments('madeupscheme:example.com/') is '["madeupscheme:","","0","example.com/","",""]'
+FAIL segments('file:example.com/') should be ["file:","","0","/example.com/","",""]. Was ["file:","","0","example.com/","",""].
+PASS segments('ftps:example.com/') is '["ftps:","","0","example.com/","",""]'
+PASS segments('gopher:example.com/') is '["gopher:","example.com","0","/","",""]'
+PASS segments('ws:example.com/') is '["ws:","example.com","0","/","",""]'
+PASS segments('wss:example.com/') is '["wss:","example.com","0","/","",""]'
+PASS segments('data:example.com/') is '["data:","","0","example.com/","",""]'
+PASS segments('javascript:example.com/') is '["javascript:","","0","example.com/","",""]'
+PASS segments('mailto:example.com/') is '["mailto:","","0","example.com/","",""]'
 PASS successfullyParsed is true
 
 TEST COMPLETE
index b97e489..6066810 100644 (file)
@@ -4,7 +4,7 @@ On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE
 
 
 PASS segments('http://user:pass@foo:21/bar;par?b#c') is '["http:","foo","21","/bar;par","?b","#c"]'
-FAIL segments('http:foo.com') should be ["http:","foo.com","0","/","",""]. Was ["http:","","0","foo.com","",""].
+PASS segments('http:foo.com') is '["http:","foo.com","0","/","",""]'
 PASS segments('\t   :foo.com   \n') is '[":","","0","","",""]'
 PASS segments(' foo.com  ') is '[":","","0","","",""]'
 PASS segments('a:\t foo.com') is '["a:","","0"," foo.com","",""]'
@@ -41,7 +41,7 @@ PASS segments('::') is '[":","","0","","",""]'
 PASS segments('::23') is '[":","","0","","",""]'
 PASS segments('foo://') is '["foo:","","0","//","",""]'
 PASS segments('http://a:b@c:29/d') is '["http:","c","29","/d","",""]'
-FAIL segments('http::@c:29') should be ["http:","c","29","/","",""]. Was ["http:","","0",":@c:29","",""].
+PASS segments('http::@c:29') is '["http:","c","29","/","",""]'
 FAIL segments('http://&a:foo(b]c@d:2/') should be ["http:","d","2","/","",""]. Was [":","","0","","",""].
 FAIL segments('http://::@c@d:2') should be ["http:","d","2","/","",""]. Was [":","","0","","",""].
 PASS segments('http://foo.com:b@d/') is '["http:","d","0","/","",""]'
@@ -60,7 +60,7 @@ PASS segments('http://foo/abcd?efgh?ijkl') is '["http:","foo","0","/abcd","?efgh
 PASS segments('http://foo/abcd#foo?bar') is '["http:","foo","0","/abcd","","#foo?bar"]'
 FAIL segments('[61:24:74]:98') should be ["data:","","0","text/[61:24:74]:98","",""]. Was [":","","0","","",""].
 FAIL segments('http://[61:27]:98') should be [":","","0","","",""]. Was ["http:","[61:27]","98","/","",""].
-FAIL segments('http:[61:27]/:foo') should be [":","","0","","",""]. Was ["http:","","0","[61:27]/:foo","",""].
+FAIL segments('http:[61:27]/:foo') should be [":","","0","","",""]. Was ["http:","[61:27]","0","/:foo","",""].
 PASS segments('http://[1::2]:3:4') is '[":","","0","","",""]'
 PASS segments('http://2001::1') is '[":","","0","","",""]'
 PASS segments('http://[2001::1') is '[":","","0","","",""]'
index 5f5c710..99ba8bb 100644 (file)
@@ -30,18 +30,29 @@ PASS canonicalize('wss://foo:80/') is 'wss://foo:80/'
 PASS canonicalize('wss://foo:81/') is 'wss://foo:81/'
 PASS canonicalize('wss://foo:443/') is 'wss://foo/'
 PASS canonicalize('wss://foo:815/') is 'wss://foo:815/'
-FAIL canonicalize('http:/example.com/') should be http://example.com/. Was http:/example.com/.
-FAIL canonicalize('ftp:/example.com/') should be ftp://example.com/. Was ftp:/example.com/.
-FAIL canonicalize('https:/example.com/') should be https://example.com/. Was https:/example.com/.
+PASS canonicalize('http:/example.com/') is 'http://example.com/'
+PASS canonicalize('ftp:/example.com/') is 'ftp://example.com/'
+PASS canonicalize('https:/example.com/') is 'https://example.com/'
 PASS canonicalize('madeupscheme:/example.com/') is 'madeupscheme:/example.com/'
 FAIL canonicalize('file:/example.com/') should be file://localhost/example.com/. Was file:///example.com/.
-PASS canonicalize('fops:/example.com/') is 'fops:/example.com/'
-FAIL canonicalize('gopher:/example.com/') should be gopher://example.com/. Was gopher:/example.com/.
-FAIL canonicalize('ws:/example.com/') should be ws://example.com/. Was ws:/example.com/.
-FAIL canonicalize('wss:/example.com/') should be wss://example.com/. Was wss:/example.com/.
+PASS canonicalize('ftps:/example.com/') is 'ftps:/example.com/'
+PASS canonicalize('gopher:/example.com/') is 'gopher://example.com/'
+PASS canonicalize('ws:/example.com/') is 'ws://example.com/'
+PASS canonicalize('wss:/example.com/') is 'wss://example.com/'
 PASS canonicalize('data:/example.com/') is 'data:/example.com/'
 PASS canonicalize('javascript:/example.com/') is 'javascript:/example.com/'
 PASS canonicalize('mailto:/example.com/') is 'mailto:/example.com/'
+PASS canonicalize('http:example.com/') is 'http://example.com/'
+PASS canonicalize('ftp:example.com/') is 'ftp://example.com/'
+PASS canonicalize('https:example.com/') is 'https://example.com/'
+PASS canonicalize('madeupscheme:example.com/') is 'madeupscheme:example.com/'
+PASS canonicalize('ftps:example.com/') is 'ftps:example.com/'
+PASS canonicalize('gopher:example.com/') is 'gopher://example.com/'
+PASS canonicalize('ws:example.com/') is 'ws://example.com/'
+PASS canonicalize('wss:example.com/') is 'wss://example.com/'
+PASS canonicalize('data:example.com/') is 'data:example.com/'
+PASS canonicalize('javascript:example.com/') is 'javascript:example.com/'
+PASS canonicalize('mailto:example.com/') is 'mailto:example.com/'
 PASS successfullyParsed is true
 
 TEST COMPLETE
index 06d7114..10b82d9 100644 (file)
@@ -1,3 +1,25 @@
+2011-04-24  Maciej Stachowiak  <mjs@apple.com>
+
+        Reviewed by George Staikos.
+
+        Handling of URLs like http:/example.com/ is incorrect
+        https://bugs.webkit.org/show_bug.cgi?id=59300
+        <rdar://problem/9231956>
+
+        URLs like http:/example.com/ or http:example.com/ are now correctly
+        canonicalized as http://example.com/
+        
+        The code still doesn't quite match other browsers - at least some
+        other browsers seem to base parsing behavior on whether they
+        recongize a scheme from a fixed list, and ignore whether // is
+        present in the URL or not.
+        
+        * platform/KURL.cpp:
+        (WebCore::isNonFileHierarchicalScheme): New helper function.
+        (WebCore::KURL::parse): For a particular list of whitelisted schemes,
+        assume they are hierarchical and need an authority even if there is no //
+        after the :/
+
 2011-04-24  Dan Bernstein  <mitz@apple.com>
 
         LLVM Compiler build fix.
index 88ad3d9..fd24e3d 100644 (file)
@@ -1137,6 +1137,23 @@ static inline bool hostPortIsEmptyButCredentialsArePresent(int hostStart, int po
     return userEndChar == '@' && hostStart == portEnd;
 }
 
+static bool isNonFileHierarchicalScheme(const char* scheme, size_t schemeLength)
+{
+    switch (schemeLength) {
+    case 2:
+        return equal("ws", 2, scheme, schemeLength);
+    case 3:
+        return equal("ftp", 3, scheme, schemeLength) || equal("wss", 3, scheme, schemeLength);
+    case 4:
+        return equal("http", 4, scheme, schemeLength);
+    case 5:
+        return equal("https", 5, scheme, schemeLength);
+    case 6:
+        return equal("gopher", 6, scheme, schemeLength);
+    }
+    return false;
+}
+
 void KURL::parse(const char* url, const String* originalString)
 {
     if (!url || url[0] == '\0') {
@@ -1173,6 +1190,7 @@ void KURL::parse(const char* url, const String* originalString)
     int portEnd;
 
     bool hierarchical = url[schemeEnd + 1] == '/';
+    bool hasSecondSlash = hierarchical && url[schemeEnd + 2] == '/';
 
     bool isFile = schemeEnd == 4
         && matchLetter(url[0], 'f')
@@ -1186,12 +1204,15 @@ void KURL::parse(const char* url, const String* originalString)
         && matchLetter(url[3], 'p')
         && (url[4] == ':' || (matchLetter(url[4], 's') && url[5] == ':'));
 
-    if (hierarchical && url[schemeEnd + 2] == '/') {
+    if ((hierarchical && hasSecondSlash) || isNonFileHierarchicalScheme(url, schemeEnd)) {
         // The part after the scheme is either a net_path or an abs_path whose first path segment is empty.
         // Attempt to find an authority.
-
         // FIXME: Authority characters may be scanned twice, and it would be nice to be faster.
-        userStart += 2;
+
+        if (hierarchical)
+            userStart++;
+        if (hasSecondSlash)
+            userStart++;
         userEnd = userStart;
 
         int colonPos = 0;
@@ -1394,8 +1415,8 @@ void KURL::parse(const char* url, const String* originalString)
         m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data();
 
     // For canonicalization, ensure we have a '/' for no path.
-    // Do this only for hierarchical URL with protocol http or https.
-    if (m_protocolInHTTPFamily && hierarchical && pathEnd == pathStart)
+    // Do this only for URL with protocol http or https.
+    if (m_protocolInHTTPFamily && pathEnd == pathStart)
         *p++ = '/';
 
     // add path, escaping bad characters