<rdar://problem/13666412> Clean up some edge cases of URL parsing.
[WebKit-https.git] / LayoutTests / fast / url / segments-userinfo-vs-host.html
1 <!DOCTYPE html>
2 <html>
3 <head>
4 <script src="../js/resources/js-test-pre.js"></script>
5 <script src="resources/utilities.js"></script>
6 </head>
7 <body>
8 <script>
9 description("Canonicalization of URLs that start with something that may or may not be userinfo");
10
11 cases = [
12     // These cases currently match between WebKit and Gecko, so the results should probably never change.
13     ["http:@www.apple.com", "http://www.apple.com/", ["http:","www.apple.com","","/","",""]],
14     ["http:/@www.apple.com", "http://www.apple.com/", ["http:","www.apple.com","","/","",""]],
15     ["http://@www.apple.com", "http://www.apple.com/", ["http:","www.apple.com","","/","",""]],
16     ["http:a:b@www.apple.com", "http://a:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
17     ["http:/a:b@www.apple.com", "http://a:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
18     ["http://a:b@www.apple.com", "http://a:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
19     ["http://@pple.com", "http://pple.com/", ["http:","pple.com","","/","",""]],
20     ["http::b@www.apple.com", "http://:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
21     ["http:/:b@www.apple.com", "http://:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
22     ["http://:b@www.apple.com", "http://:b@www.apple.com/", ["http:","www.apple.com","","/","",""]],
23
24     // Gecko and WebKit both treat this URLs as invalid, but Gecko reports "http:" as protocol.
25     ["http:/:@/www.apple.com", "http:/:@/www.apple.com", [":","","","","",""]],
26
27     // HTTP URLs with an empty host are invalid, because otherwise, there is just too much risk that another parsing pass (possibly by a network layer with a subtly different algorithm) would treat path as hostname.
28     ["http://user@/www.apple.com", "http://user@/www.apple.com", [":","","","","",""]], // Regression test for <https://bugs.webkit.org/show_bug.cgi?id=57220>.
29     ["http:@/www.apple.com", "http:@/www.apple.com", [":","","","","",""]], // Regression test for <https://bugs.webkit.org/show_bug.cgi?id=104919>.    
30     ["http:/@/www.apple.com", "http:/@/www.apple.com", [":","","","","",""]],
31     ["http://@/www.apple.com", "http://@/www.apple.com", [":","","","","",""]],
32     ["https:@/www.apple.com", "https:@/www.apple.com", [":","","","","",""]], // Regression test for <https://bugs.webkit.org/show_bug.cgi?id=104919>.    
33     ["http:a:b@/www.apple.com", "http:a:b@/www.apple.com", [":","","","","",""]],
34     ["http:/a:b@/www.apple.com", "http:/a:b@/www.apple.com", [":","","","","",""]],
35     ["http://a:b@/www.apple.com", "http://a:b@/www.apple.com", [":","","","","",""]],
36
37     // Gecko treats this URL as relative. WebKit treats it as invalid, because of empty host.
38     ["http::@/www.apple.com", "http::@/www.apple.com", [":","","","","",""]],
39
40     // Empty password. Gecko still adds a ':', WebKit does not.
41     ["http:a:@www.apple.com", "http://a@www.apple.com/", ["http:","www.apple.com","","/","",""]],
42     ["http:/a:@www.apple.com", "http://a@www.apple.com/", ["http:","www.apple.com","","/","",""]],
43     ["http://a:@www.apple.com", "http://a@www.apple.com/", ["http:","www.apple.com","","/","",""]],
44
45     // Some tests for multiple @-signs. WebKit treats these URLs as invalid, while Gecko allows '@' in userinfo, searching for the last instance in authority as a delimiter.
46     ["http://a:b@www.@pple.com/p@th", "http://a:b@www.@pple.com/p@th", [":","","","","",""]],
47     ["http://www.@@pple.com", "http://www.@@pple.com", [":","","","","",""]],
48     ["http://@@pple.com", "http://@@pple.com", [":","","","","",""]],
49     ["http://@@@pple.com", "http://@@@pple.com", [":","","","","",""]],
50     ["http:@@/www.apple.com", "http:@@/www.apple.com", [":","","","","",""]],
51     ["http:/@@/www.apple.com", "http:/@@/www.apple.com", [":","","","","",""]],
52     ["http://@@/www.apple.com", "http://@@/www.apple.com", [":","","","","",""]],
53     ["http:@:b@www.apple.com", "http:@:b@www.apple.com", [":","","","","",""]],
54     ["http:/@:b@www.apple.com", "http:/@:b@www.apple.com", [":","","","","",""]],
55     ["http://@:b@www.apple.com", "http://@:b@www.apple.com", [":","","","","",""]],
56
57     // Gecko escapes '.' in username, WebKit does not.
58     ["http://www.@pple.com", "http://www.@pple.com/", ["http:","pple.com","","/","",""]],
59
60     // Invalid in WebKit. Gecko thinks that hostname is ":www.apple.com", which doesn't seem like a good idea.
61     ["http:@:www.apple.com", "http:@:www.apple.com", [":","","","","",""]],
62     ["http:/@:www.apple.com", "http:/@:www.apple.com", [":","","","","",""]],
63     ["http://@:www.apple.com", "http://@:www.apple.com", [":","","","","",""]],
64
65     // Invalid in Gecko. WebKit just treats this as a URL with empty username and password, which appears to make sense.
66     ["http://:@www.apple.com", "http://www.apple.com/", ["http:","www.apple.com","","/","",""]],
67
68     // Host is empty, but hostname is not, so the resulting parsed URL string is not particularly ambiguous.
69     // Gecko builds the same parsed URL string, although its components are different.
70     // FIXME: Unsure why we are getting a "0" as port here, but it probably doesn't matter for such an edge case.
71     ["http:@:/www.apple.com", "http://:/www.apple.com", ["http:","","0","/www.apple.com","",""]],
72     ["http:/@:/www.apple.com", "http://:/www.apple.com", ["http:","","0","/www.apple.com","",""]],
73     ["http://@:/www.apple.com", "http://:/www.apple.com", ["http:","","0","/www.apple.com","",""]],
74     ["http:@:80/www.apple.com", "http://:80/www.apple.com", ["http:","","80","/www.apple.com","",""]],
75 ];
76
77 for (var i = 0; i < cases.length; ++i) {
78     var test_string = cases[i][0];
79     var expected_canonicalized_url = cases[i][1];
80     var expected_url_segments = cases[i][2];
81     shouldBe("canonicalize('" + test_string + "')", "'" + expected_canonicalized_url + "'");
82     shouldBe("segments('" + test_string + "')", "'" + JSON.stringify(expected_url_segments) + "'");
83     debug("");
84 }
85 </script>
86 <script src="../js/resources/js-test-post.js"></script>
87 </body>
88 </html>