Reviewed by Darin.
authorap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 7 May 2006 18:01:59 +0000 (18:01 +0000)
committerap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 7 May 2006 18:01:59 +0000 (18:01 +0000)
        - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
          TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC

        Tests:
        * fast/forms/form-data-encoding.html
        * fast/forms/form-data-encoding-2.html

        * platform/mac/TextEncodingMac.cpp:
        (WebCore::TextEncoding::fromUnicode): Normalize the string; handle surrogate pairs.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@14217 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/forms/form-data-encoding-2-expected.txt [new file with mode: 0644]
LayoutTests/fast/forms/form-data-encoding-2.html [new file with mode: 0644]
LayoutTests/fast/forms/form-data-encoding.html
WebCore/ChangeLog
WebCore/platform/mac/TextEncodingMac.cpp

index b91e25c16be6461c96c5993e485d35f42f82e6ff..10efce53453455740a90098fad7c5efa742d7b49 100644 (file)
@@ -1,3 +1,14 @@
+2006-05-07  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - Tests for http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
+          TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC
+
+        * fast/forms/form-data-encoding.html: Cover more cases for Latin-1 encoding.
+        * fast/forms/form-data-encoding-2.html: Added. Test conversion to UTF-8.
+        * fast/forms/form-data-encoding-2-expected.txt: Added.
+
 2006-05-05  Alexey Proskuryakov  <ap@nypop.com>
 
         Reviewed by Maciej.
diff --git a/LayoutTests/fast/forms/form-data-encoding-2-expected.txt b/LayoutTests/fast/forms/form-data-encoding-2-expected.txt
new file mode 100644 (file)
index 0000000..662eb14
--- /dev/null
@@ -0,0 +1,4 @@
+
+Success
+
+
diff --git a/LayoutTests/fast/forms/form-data-encoding-2.html b/LayoutTests/fast/forms/form-data-encoding-2.html
new file mode 100644 (file)
index 0000000..ba7a8ad
--- /dev/null
@@ -0,0 +1,38 @@
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8">
+<title>Charsets and submitting forms</title>
+</head>
+<body>
+<form action="?" name=f>
+ <input type=hidden name=q value="&#1105;&#66560;&#119083;çc"><br>
+</form>
+<script>
+if (window.layoutTestController) {
+    layoutTestController.dumpAsText();
+    layoutTestController.waitUntilDone();
+}
+
+if (document.URL.substring(0, 4) == "file") {
+
+    if (document.URL.indexOf('?') == -1) {
+        document.f.q.value += "\u0327"; // cedilla for 'c'
+        document.f.submit();
+
+    } else {
+        if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%D1%91%F0%90%90%80%F0%9D%84%AB%C3%A7%C3%A7"))
+            document.write("<p>Success</p>");
+        else
+            document.write("<p>Failure</p>");
+        
+        if (window.layoutTestController)
+            layoutTestController.notifyDone();
+    }
+
+} else {
+
+    document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
+}
+</script>
+</body>
+</html>
\ No newline at end of file
index 6bc28e1cb45f90af6760e5390f7a96e779d0bdc8..47364a1676a3ed202f5e45d0ce0d8dfac688fc5d 100644 (file)
@@ -5,34 +5,33 @@
 </head>
 <body>
 <form action="?" name=f>
- <input type=hidden name=q value="&#1090;&#1077;&#1089;&#1090;"><br>
+ <input type=hidden name=q value="&#1105;&#66560;&#119083;çc"><br>
 </form>
 <script>
 if (window.layoutTestController) {
-       layoutTestController.dumpAsText();
-       layoutTestController.waitUntilDone();
+    layoutTestController.dumpAsText();
+    layoutTestController.waitUntilDone();
 }
 
 if (document.URL.substring(0, 4) == "file") {
 
-       if (document.URL.indexOf('?') == -1) {
+    if (document.URL.indexOf('?') == -1) {
+        document.f.q.value += "\u0327"; // cedilla for 'c'
+        document.f.submit();
 
-               document.f.submit();
-
-       } else {
-               
-               if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%26%231090%3B%26%231077%3B%26%231089%3B%26%231090%3B"))
-                       document.write("<p>Success</p>");
-               else
-                       document.write("<p>Failure</p>");
-               
-               if (window.layoutTestController)
-                       layoutTestController.notifyDone();
-       }
+    } else {
+        if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%26%231105%3B%26%2366560%3B%26%23119083%3B%E7%E7"))
+            document.write("<p>Success</p>");
+        else
+            document.write("<p>Failure</p>");
+        
+        if (window.layoutTestController)
+            layoutTestController.notifyDone();
+    }
 
 } else {
 
-       document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
+    document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
 }
 </script>
 </body>
index 670f0f6f6f22842a00eec0e78f2fd5d529d71b82..3ddea82dce4f13f34fdf5f5e006a17bb7a39ff08 100644 (file)
@@ -1,3 +1,17 @@
+2006-05-07  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
+          TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC
+
+        Tests: 
+        * fast/forms/forms/form-data-encoding.html
+        * fast/forms/forms/form-data-encoding-2.html
+
+        * platform/mac/TextEncodingMac.cpp:
+        (WebCore::TextEncoding::fromUnicode): Normalize the string; handle surrogate pairs.
+
 2006-05-06  David Hyatt  <hyatt@apple.com>
 
         Rename WebCoreFont to FontPlatformData and WebTextRenderer to FontData.  Merge them into the
index c30f35fce0dad0c833f61f1ae4fffaba4d1d6f92..3e6575051e626b8988fd22326d5df7c0f96467ea 100644 (file)
@@ -53,35 +53,47 @@ DeprecatedCString TextEncoding::fromUnicode(const DeprecatedString &qcs, bool al
     DeprecatedString copy = qcs;
     copy.replace(QChar('\\'), backslashAsCurrencySymbol());
     CFStringRef cfs = copy.getCFString();
+    CFMutableStringRef cfms = CFStringCreateMutableCopy(0, 0, cfs); // in rare cases, normalization can make the string longer, thus no limit on its length
+    CFStringNormalize(cfms, kCFStringNormalizationFormC);
     
     CFIndex startPos = 0;
-    CFIndex charactersLeft = CFStringGetLength(cfs);
-    DeprecatedCString result(1); // for trailng zero
+    CFIndex charactersLeft = CFStringGetLength(cfms);
+    DeprecatedCString result(1); // for trailing zero
 
     while (charactersLeft > 0) {
         CFRange range = CFRangeMake(startPos, charactersLeft);
         CFIndex bufferLength;
-        CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
+        CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
         
         DeprecatedCString chunk(bufferLength + 1);
         unsigned char *buffer = reinterpret_cast<unsigned char *>(chunk.data());
-        CFIndex charactersConverted = CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength);
+        CFIndex charactersConverted = CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength);
         buffer[bufferLength] = 0;
         result.append(chunk);
         
         if (charactersConverted != charactersLeft) {
-            // FIXME: support surrogate pairs
-            UniChar badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+            unsigned int badChar = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted);
+            ++charactersConverted;
+
+            if ((badChar & 0xfc00) == 0xd800 &&     // is high surrogate
+                  charactersConverted != charactersLeft) {
+                UniChar low = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted);
+                if ((low & 0xfc00) == 0xdc00) {     // is low surrogate
+                    badChar <<= 10;
+                    badChar += low;
+                    badChar += 0x10000 - (0xd800 << 10) - 0xdc00;
+                    ++charactersConverted;
+                }
+            }
             char buf[16];
             sprintf(buf, "&#%u;", badChar);
             result.append(buf);
-            
-            ++charactersConverted;
         }
         
         startPos += charactersConverted;
         charactersLeft -= charactersConverted;
     }
+    CFRelease(cfms);
     return result;
 }