e88b474daffe1333dd86a17deab102f8d67a7d7a
[WebKit-https.git] / LayoutTests / fast / url / host.html
1 <!DOCTYPE html>
2 <html>
3 <head>
4 <script src="../js/resources/js-test-pre.js"></script>
5 <script src="resources/utilities.js"></script>
6 </head>
7 <body>
8 <script>
9 description("Canonicalization of host names.");
10
11 cases = [ 
12   // Basic canonicalization, uppercase should be converted to lowercase.
13   ["GoOgLe.CoM", "google.com"],
14   // Spaces and some other characters should be escaped.
15   ["Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com"],
16   // Exciting different types of spaces!
17   ["GOO\u00a0\u3000goo.com", "goo%20%20goo.com"],
18   // Other types of space (no-break, zero-width, zero-width-no-break) are
19   // name-prepped away to nothing.
20   ["GOO\u200b\u2060\ufeffgoo.com", "googoo.com"],
21   // Ideographic full stop (full-width period for Chinese, etc.) should be
22   // treated as a dot.
23   ["www.foo\u3002" + "bar.com", "www.foo.bar.com"],
24   // Invalid unicode characters should fail...
25   // ...In wide input, ICU will barf and we'll end up with the input as
26   //    escaped UTF-8 (the invalid character should be replaced with the
27   //    replacement character).
28   ["\ufdd0zyx.com", "%EF%BF%BDzyx.com"],
29   // ...This is the same as previous but with with escaped.
30   ["%ef%b7%90zyx.com", "%EF%BF%BDzyx.com"],
31   // Test name prepping, fullwidth input should be converted to ASCII and NOT
32   // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
33   ["\uff27\uff4f.com", "go.com"],
34   // Test that fullwidth escaped values are properly name-prepped,
35   // then converted or rejected.
36   // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
37   ["\uff05\uff14\uff11.com", "a.com"],
38   ["%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com"],
39   // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
40   ["\uff05\uff10\uff10.com", "%00.com"],
41   ["%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com"],
42   // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
43   ["\u4f60\u597d\u4f60\u597d", "xn--6qqa088eba"],
44   // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
45   // UTF-8 (wide case). The output should be equivalent to the true wide
46   // character input above).
47   ["%E4%BD%A0%E5%A5%BD\u4f60\u597d", "xn--6qqa088eba"],
48   // Invalid escaped characters should fail and the percents should be
49   // escaped.
50   ["%zz%66%a", "%25zzf%25a"],
51   // If we get an invalid character that has been escaped.
52   ["%25", "%25"],
53   ["hello%00", "hello%00"],
54   // Escaped numbers should be treated like IP addresses if they are.
55   ["%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1"],
56   ["%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1"],
57   // Invalid escaping should trigger the regular host error handling.
58   ["%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01"],
59   // Something that isn't exactly an IP should get treated as a host and
60   // spaces escaped.
61   ["192.168.0.1 hello", "192.168.0.1%20hello"],
62   // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
63   // These are "0Xc0.0250.01" in fullwidth.
64   ["\uff10\uff38\uff43\uff10\uff0e\uff10\uff12\uff15\uff10\uff0e\uff10\uff11", "192.168.0.1"],
65   // Broken IP addresses get marked as such.
66   ["192.168.0.257", "192.168.0.257"],
67   ["[google.com]", "[google.com]"],
68   // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e.
69   // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect).
70   ["\u0442(", "xn--%28-7ed"],
71   ["go\\\\@ogle.com","go/@ogle.com"],
72   ["go/@ogle.com","go/@ogle.com"],
73   ["www.lookout.net::==80::==443::","www.lookout.net::%3D%3D80::%3D%3D443:"],
74   ["www.lookout.net::80::443","www.lookout.net::80::443"],
75   // From http://eaea.sirdarckcat.net/uritest.html
76   ["\\./","./"],
77   ["//:@/","/"],
78   ["\\google.com/foo","google.com/foo"],
79   ["\\\\google.com/foo","google.com/foo"],
80   ["//asdf@/","asdf@/"],
81   ["//:81",":81"],
82   ["://","//"],
83   ["c:","c"],
84   ["xxxx:","xxxx"],
85   [".:.",".:."],
86   ["////@google.com/","google.com/"],
87   ["@google.com","google.com"]
88 ];
89
90 for (var i = 0; i < cases.length; ++i) {
91   test_vector = cases[i][0];
92   expected_result = cases[i][1];
93   shouldBe("canonicalize('http://" + test_vector + "/')",
94            "'http://" + expected_result + "/'");
95 }
96 </script>
97 <script src="../js/resources/js-test-post.js"></script>
98 </body>
99 </html>