2010-04-15 Adam Barth <abarth@webkit.org>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 16 Apr 2010 02:38:45 +0000 (02:38 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 16 Apr 2010 02:38:45 +0000 (02:38 +0000)
        Reviewed by Jeremy Orlow.

        Add URL parsing tests for host names
        https://bugs.webkit.org/show_bug.cgi?id=37666

        * fast/url/host-expected.txt: Added.
        * fast/url/host.html: Added.
        * fast/url/script-tests/host.js: Added.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@57711 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/url/host-expected.txt [new file with mode: 0644]
LayoutTests/fast/url/host.html [new file with mode: 0644]
LayoutTests/fast/url/script-tests/host.js [new file with mode: 0644]

index 976523f..2f1c00e 100644 (file)
@@ -2,6 +2,17 @@
 
         Reviewed by Jeremy Orlow.
 
+        Add URL parsing tests for host names
+        https://bugs.webkit.org/show_bug.cgi?id=37666
+
+        * fast/url/host-expected.txt: Added.
+        * fast/url/host.html: Added.
+        * fast/url/script-tests/host.js: Added.
+
+2010-04-15  Adam Barth  <abarth@webkit.org>
+
+        Reviewed by Jeremy Orlow.
+
         Add URL parsing tests for paths
         https://bugs.webkit.org/show_bug.cgi?id=37672
 
diff --git a/LayoutTests/fast/url/host-expected.txt b/LayoutTests/fast/url/host-expected.txt
new file mode 100644 (file)
index 0000000..afca9ed
--- /dev/null
@@ -0,0 +1,34 @@
+Canonicalization of host names.
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+FAIL canonicalize('http://GoOgLe.CoM/') should be http://google.com/. Was http://GoOgLe.CoM/.
+FAIL canonicalize('http://Goo%20 goo%7C|.com/') should be http://goo%20%20goo%7C%7C.com/. Was http://Goo%20 goo%7C|.com/.
+FAIL canonicalize('http://GOO  goo.com/') should be http://goo%20%20goo.com/. Was http://goo  goo.com/.
+PASS canonicalize('http://GOO​⁠goo.com/') is 'http://googoo.com/'
+PASS canonicalize('http://www.foo。bar.com/') is 'http://www.foo.bar.com/'
+FAIL canonicalize('http://﷐zyx.com/') should be http://%EF%BF%BDzyx.com/. Was http:/.
+FAIL canonicalize('http://%ef%b7%90zyx.com/') should be http://%EF%BF%BDzyx.com/. Was http://%ef%b7%90zyx.com/.
+PASS canonicalize('http://Go.com/') is 'http://go.com/'
+FAIL canonicalize('http://%41.com/') should be http://a.com/. Was http://%41.com/.
+FAIL canonicalize('http://%ef%bc%85%ef%bc%94%ef%bc%91.com/') should be http://a.com/. Was http://%ef%bc%85%ef%bc%94%ef%bc%91.com/.
+PASS canonicalize('http://%00.com/') is 'http://%00.com/'
+FAIL canonicalize('http://%ef%bc%85%ef%bc%90%ef%bc%90.com/') should be http://%00.com/. Was http://%ef%bc%85%ef%bc%90%ef%bc%90.com/.
+PASS canonicalize('http://你好你好/') is 'http://xn--6qqa088eba/'
+FAIL canonicalize('http://%E4%BD%A0%E5%A5%BD你好/') should be http://xn--6qqa088eba/. Was http://xn--%e4%bd%a0%e5%a5%bd-5g72ap39l/.
+FAIL canonicalize('http://%zz%66%a/') should be http://%25zzf%25a/. Was http://%zz%66%a/.
+PASS canonicalize('http://%25/') is 'http://%25/'
+PASS canonicalize('http://hello%00/') is 'http://hello%00/'
+FAIL canonicalize('http://%30%78%63%30%2e%30%32%35%30.01/') should be http://192.168.0.1/. Was http://%30%78%63%30%2e%30%32%35%30.01/.
+FAIL canonicalize('http://%30%78%63%30%2e%30%32%35%30.01%2e/') should be http://192.168.0.1/. Was http://%30%78%63%30%2e%30%32%35%30.01%2e/.
+FAIL canonicalize('http://%3g%78%63%30%2e%30%32%35%30%2E.01/') should be http://%253gxc0.0250..01/. Was http://%3g%78%63%30%2e%30%32%35%30%2E.01/.
+FAIL canonicalize('http://192.168.0.1 hello/') should be http://192.168.0.1%20hello/. Was http://192.168.0.1 hello/.
+FAIL canonicalize('http://0Xc0.0250.01/') should be http://192.168.0.1/. Was http://0xc0.0250.01/.
+PASS canonicalize('http://192.168.0.257/') is 'http://192.168.0.257/'
+PASS canonicalize('http://[google.com]/') is 'http://[google.com]/'
+FAIL canonicalize('http://т(/') should be http://xn--%28-7ed/. Was http://xn--(-8tb/.
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
diff --git a/LayoutTests/fast/url/host.html b/LayoutTests/fast/url/host.html
new file mode 100644 (file)
index 0000000..32082c2
--- /dev/null
@@ -0,0 +1,14 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<link rel="stylesheet" href="../js/resources/js-test-style.css">
+<script src="../js/resources/js-test-pre.js"></script>
+<script src="resources/utilities.js"></script>
+</head>
+<body>
+<p id="description"></p>
+<div id="console"></div>
+<script src="script-tests/host.js"></script>
+<script src="../js/resources/js-test-post.js"></script>
+</body>
+</html>
diff --git a/LayoutTests/fast/url/script-tests/host.js b/LayoutTests/fast/url/script-tests/host.js
new file mode 100644 (file)
index 0000000..2f7ee87
--- /dev/null
@@ -0,0 +1,72 @@
+description("Canonicalization of host names.");
+
+cases = [ 
+  // Basic canonicalization, uppercase should be converted to lowercase.
+  ["GoOgLe.CoM", "google.com"],
+  // Spaces and some other characters should be escaped.
+  ["Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com"],
+  // Exciting different types of spaces!
+  ["GOO\u00a0\u3000goo.com", "goo%20%20goo.com"],
+  // Other types of space (no-break, zero-width, zero-width-no-break) are
+  // name-prepped away to nothing.
+  ["GOO\u200b\u2060\ufeffgoo.com", "googoo.com"],
+  // Ideographic full stop (full-width period for Chinese, etc.) should be
+  // treated as a dot.
+  ["www.foo\u3002" + "bar.com", "www.foo.bar.com"],
+  // Invalid unicode characters should fail...
+  // ...In wide input, ICU will barf and we'll end up with the input as
+  //    escaped UTF-8 (the invalid character should be replaced with the
+  //    replacement character).
+  ["\ufdd0zyx.com", "%EF%BF%BDzyx.com"],
+  // ...This is the same as previous but with with escaped.
+  ["%ef%b7%90zyx.com", "%EF%BF%BDzyx.com"],
+  // Test name prepping, fullwidth input should be converted to ASCII and NOT
+  // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
+  ["\uff27\uff4f.com", "go.com"],
+  // Test that fullwidth escaped values are properly name-prepped,
+  // then converted or rejected.
+  // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
+  ["\uff05\uff14\uff11.com", "a.com"],
+  ["%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com"],
+  // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
+  ["\uff05\uff10\uff10.com", "%00.com"],
+  ["%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com"],
+  // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
+  ["\u4f60\u597d\u4f60\u597d", "xn--6qqa088eba"],
+  // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
+  // UTF-8 (wide case). The output should be equivalent to the true wide
+  // character input above).
+  ["%E4%BD%A0%E5%A5%BD\u4f60\u597d", "xn--6qqa088eba"],
+  // Invalid escaped characters should fail and the percents should be
+  // escaped.
+  ["%zz%66%a", "%25zzf%25a"],
+  // If we get an invalid character that has been escaped.
+  ["%25", "%25"],
+  ["hello%00", "hello%00"],
+  // Escaped numbers should be treated like IP addresses if they are.
+  ["%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1"],
+  ["%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1"],
+  // Invalid escaping should trigger the regular host error handling.
+  ["%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01"],
+  // Something that isn't exactly an IP should get treated as a host and
+  // spaces escaped.
+  ["192.168.0.1 hello", "192.168.0.1%20hello"],
+  // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
+  // These are "0Xc0.0250.01" in fullwidth.
+  ["\uff10\uff38\uff43\uff10\uff0e\uff10\uff12\uff15\uff10\uff0e\uff10\uff11", "192.168.0.1"],
+  // Broken IP addresses get marked as such.
+  ["192.168.0.257", "192.168.0.257"],
+  ["[google.com]", "[google.com]"],
+  // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e.
+  // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect).
+  ["\u0442(", "xn--%28-7ed"]
+];
+
+for (var i = 0; i < cases.length; ++i) {
+  test_vector = cases[i][0];
+  expected_result = cases[i][1];
+  shouldBe("canonicalize('http://" + test_vector + "/')",
+           "'http://" + expected_result + "/'");
+}
+
+var successfullyParsed = true;