Reviewed by Darin.
authorap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 9 May 2006 08:03:30 +0000 (08:03 +0000)
committerap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 9 May 2006 08:03:30 +0000 (08:03 +0000)
        - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
          TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC

        Fix the ICU code path, too (currently unused on the Mac).

        * platform/TextEncoding.cpp:
        (WebCore::TextEncoding::fromUnicode): Normalize the string.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@14253 268f45cc-cd09-0410-ab3c-d52691b4dbfc

WebCore/ChangeLog
WebCore/platform/TextEncoding.cpp

index 8249988..ff71e4b 100644 (file)
@@ -1,3 +1,15 @@
+2006-05-09  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
+          TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC
+
+        Fix the ICU code path, too (currently unused on the Mac).
+
+        * platform/TextEncoding.cpp:
+        (WebCore::TextEncoding::fromUnicode): Normalize the string.
+
 2006-05-08  Maciej Stachowiak  <mjs@apple.com>
 
         Reviewed by Tim Hatcher.
index 022c334..33cf82e 100644 (file)
@@ -30,6 +30,7 @@
 #include <kxmlcore/Assertions.h>
 #include <kxmlcore/HashSet.h>
 #include "StreamingTextDecoder.h"
+#include <unicode/unorm.h>
 
 namespace WebCore {
 
@@ -143,7 +144,22 @@ DeprecatedCString TextEncoding::fromUnicode(const DeprecatedString &qcs, bool al
     const UChar* source = reinterpret_cast<const UChar*>(copy.unicode());
     const UChar* sourceLimit = source + copy.length();
 
-    DeprecatedCString result(1); // for trailng zero
+    DeprecatedString normalizedString;
+    if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) {
+        normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed
+        
+        int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<QChar*>(normalizedString.unicode())), copy.length(), &err);
+        if (err == U_BUFFER_OVERFLOW_ERROR) {
+            err = U_ZERO_ERROR;
+            normalizedString.truncate(normalizedLength);
+            normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<QChar*>(normalizedString.unicode())), normalizedLength, &err);
+        }
+        
+        source = reinterpret_cast<const UChar*>(normalizedString.unicode());
+        sourceLimit = source + normalizedLength;
+    }
+
+    DeprecatedCString result(1); // for trailing zero
 
     if (allowEntities)
         ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
@@ -152,6 +168,10 @@ DeprecatedCString TextEncoding::fromUnicode(const DeprecatedString &qcs, bool al
         ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
     }
 
+    ASSERT(U_SUCCESS(err));
+    if (U_FAILURE(err))
+        return DeprecatedCString();
+
     do {
         char* target = buffer;
         char* targetLimit = target + ConversionBufferSize;