Reviewed by Darin.
authorap@webkit.org <ap@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 15 Nov 2007 05:59:40 +0000 (05:59 +0000)
committerap@webkit.org <ap@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 15 Nov 2007 05:59:40 +0000 (05:59 +0000)
        http://bugs.webkit.org/show_bug.cgi?id=15982
        Improve JSString UTF-8 decoding

        * API/JSStringRef.cpp:
        (JSStringCreateWithUTF8CString): Use strict decoding, return 0 on error.

        * wtf/unicode/UTF8.cpp:
        (WTF::Unicode::convertUTF16ToUTF8):
        (WTF::Unicode::convertUTF8ToUTF16):
        * wtf/unicode/UTF8.h:
        Made these function names start with a lower case letter.

        * kjs/ustring.cpp: (KJS::UString::UTF8String): Updated for the above renaming.

        * bindings/c/c_utility.cpp:
        (KJS::Bindings::convertUTF8ToUTF16WithLatin1Fallback): Renamed to highlight the difference
        from convertUTF8ToUTF16 in wtf/unicode.
        (KJS::Bindings::convertNPStringToUTF16): Updated for the above renaming.
        (KJS::Bindings::identifierFromNPIdentifier): Ditto.
        * bindings/c/c_utility.h: Made convertUTF8ToUTF16WithLatin1Fallback() a file static.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@27810 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/API/JSStringRef.cpp
JavaScriptCore/ChangeLog
JavaScriptCore/bindings/c/c_utility.cpp
JavaScriptCore/bindings/c/c_utility.h
JavaScriptCore/kjs/ustring.cpp
JavaScriptCore/wtf/unicode/UTF8.cpp
JavaScriptCore/wtf/unicode/UTF8.h

index e9bca87627aada951a12df9cdf6298596a3020da..9a307d045b36665df9c0e0a0377fedc3f411da66 100644 (file)
@@ -54,7 +54,8 @@ JSStringRef JSStringCreateWithUTF8CString(const char* string)
     size_t length = strlen(string);
     Vector< ::UChar, 1024> buffer(length);
     ::UChar* p = buffer.data();
-    ConvertUTF8ToUTF16(&string, string + length, &p, p + length, false);
+    if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
+        return 0;
 
     return toRef(UString(reinterpret_cast<KJS::UChar*>(buffer.data()), p - buffer.data()).rep()->ref());
 }
index 27f39bc33c836a0c90d1fa4a49aa05ba6bc8fc23..56c8262f28c3f26978e4bdff500d983edebaa562 100644 (file)
@@ -1,3 +1,28 @@
+2007-11-14  Alexey Proskuryakov  <ap@webkit.org>
+
+        Reviewed by Darin.
+
+        http://bugs.webkit.org/show_bug.cgi?id=15982
+        Improve JSString UTF-8 decoding
+
+        * API/JSStringRef.cpp:
+        (JSStringCreateWithUTF8CString): Use strict decoding, return 0 on error.
+
+        * wtf/unicode/UTF8.cpp:
+        (WTF::Unicode::convertUTF16ToUTF8):
+        (WTF::Unicode::convertUTF8ToUTF16):
+        * wtf/unicode/UTF8.h:
+        Made these function names start with a lower case letter.
+
+        * kjs/ustring.cpp: (KJS::UString::UTF8String): Updated for the above renaming.
+
+        * bindings/c/c_utility.cpp:
+        (KJS::Bindings::convertUTF8ToUTF16WithLatin1Fallback): Renamed to highlight the difference
+        from convertUTF8ToUTF16 in wtf/unicode.
+        (KJS::Bindings::convertNPStringToUTF16): Updated for the above renaming.
+        (KJS::Bindings::identifierFromNPIdentifier): Ditto.
+        * bindings/c/c_utility.h: Made convertUTF8ToUTF16WithLatin1Fallback() a file static.
+
 2007-11-14  Sam Weinig  <sam@webkit.org>
 
         Rubber-stamped by Anders.
index 81ec7dfe3cac53eac59a32edee72e75d9b11547f..3831bed1fa479783d5589e10cc76d427c094d5e2 100644 (file)
@@ -46,13 +46,7 @@ using namespace WTF::Unicode;
 namespace KJS { namespace Bindings {
 
 // Requires free() of returned UTF16Chars.
-void convertNPStringToUTF16(const NPString *string, NPUTF16 **UTF16Chars, unsigned int *UTF16Length)
-{
-    convertUTF8ToUTF16(string->UTF8Characters, string->UTF8Length, UTF16Chars, UTF16Length);
-}
-
-// Requires free() of returned UTF16Chars.
-void convertUTF8ToUTF16(const NPUTF8* UTF8Chars, int UTF8Length, NPUTF16** UTF16Chars, unsigned int* UTF16Length)
+static void convertUTF8ToUTF16WithLatin1Fallback(const NPUTF8* UTF8Chars, int UTF8Length, NPUTF16** UTF16Chars, unsigned int* UTF16Length)
 {
     ASSERT(UTF8Chars || UTF8Length == 0);
     ASSERT(UTF16Chars);
@@ -69,7 +63,7 @@ void convertUTF8ToUTF16(const NPUTF8* UTF8Chars, int UTF8Length, NPUTF16** UTF16
     ::UChar* targetstart = reinterpret_cast< ::UChar*>(*UTF16Chars);
     ::UChar* targetend = targetstart + UTF8Length;
     
-    ConversionResult result = ConvertUTF8ToUTF16(&sourcestart, sourceend, &targetstart, targetend, true);
+    ConversionResult result = convertUTF8ToUTF16(&sourcestart, sourceend, &targetstart, targetend);
     
     *UTF16Length = targetstart - reinterpret_cast< ::UChar*>(*UTF16Chars);
 
@@ -178,11 +172,17 @@ JSValue *convertNPVariantToValue(ExecState*, const NPVariant* variant, RootObjec
     return jsUndefined();
 }
 
+// Requires free() of returned UTF16Chars.
+void convertNPStringToUTF16(const NPString *string, NPUTF16 **UTF16Chars, unsigned int *UTF16Length)
+{
+    convertUTF8ToUTF16WithLatin1Fallback(string->UTF8Characters, string->UTF8Length, UTF16Chars, UTF16Length);
+}
+
 Identifier identifierFromNPIdentifier(const NPUTF8* name)
 {
     NPUTF16 *methodName;
     unsigned UTF16Length;
-    convertUTF8ToUTF16(name, -1, &methodName, &UTF16Length); // requires free() of returned memory.
+    convertUTF8ToUTF16WithLatin1Fallback(name, -1, &methodName, &UTF16Length); // requires free() of returned memory.
     Identifier identifier((const KJS::UChar*)methodName, UTF16Length);
     free(methodName);
     return identifier;
index ca9110f58f9abdf7ce3a75765d5c9298b43a9547..05ffce096033ddee5699ebf8008da0f278d1645d 100644 (file)
@@ -53,7 +53,6 @@ enum NP_ValueType {
 };
 
 void convertNPStringToUTF16(const NPString*, NPUTF16** UTF16Chars, unsigned int* UTF16Length);
-void convertUTF8ToUTF16(const NPUTF8* UTF8Chars, int UTF8Length, NPUTF16** UTF16Chars, unsigned int* UTF16Length);
 void convertValueToNPVariant(ExecState*, JSValue*, NPVariant* result);
 JSValue* convertNPVariantToValue(ExecState*, const NPVariant*, RootObject*);
 Identifier identifierFromNPIdentifier(const NPUTF8* name);
index 936f5c2c759807456b02dc67ee3f65a19cd29d23..5544aeae2e09005409cc6268574d831939b3b469 100644 (file)
@@ -1282,7 +1282,7 @@ CString UString::UTF8String(bool strict) const
   // Convert to runs of 8-bit characters.
   char* p = buffer.data();
   const ::UChar* d = reinterpret_cast<const ::UChar*>(&data()->uc);
-  ConversionResult result = ConvertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
+  ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
   if (result != conversionOK)
     return CString();
 
index be7b8776a95ba543310e33cd18846098b78ee8c2..9e713fe36489fa8f46b724b4b1d838f94b28d72f 100644 (file)
@@ -119,7 +119,7 @@ int decodeUTF8Sequence(const char* sequence)
 // for *legal* UTF-8 will be 4 or fewer bytes total.
 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 
-ConversionResult ConvertUTF16ToUTF8(
+ConversionResult convertUTF16ToUTF8(
     const UChar** sourceStart, const UChar* sourceEnd, 
     char** targetStart, char* targetEnd, bool strict)
 {
@@ -230,7 +230,7 @@ static bool isLegalUTF8(const unsigned char* source, int length)
 static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
             0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
-ConversionResult ConvertUTF8ToUTF16(
+ConversionResult convertUTF8ToUTF16(
     const char** sourceStart, const char* sourceEnd, 
     UChar** targetStart, UChar* targetEnd, bool strict)
 {
index a09bc41212ea3dda5225bde8867fca66cc977254..a5ed93e94ddb871ad2ad419e7237619c3fd9624e 100644 (file)
@@ -62,11 +62,11 @@ namespace WTF {
     // to the replacement character; otherwise (when the flag is set to strict)
     // they constitute an error.
 
-    ConversionResult ConvertUTF8ToUTF16(
+    ConversionResult convertUTF8ToUTF16(
                     const char** sourceStart, const char* sourceEnd, 
                     UChar** targetStart, UChar* targetEnd, bool strict = true);
 
-    ConversionResult ConvertUTF16ToUTF8(
+    ConversionResult convertUTF16ToUTF8(
                     const UChar** sourceStart, const UChar* sourceEnd, 
                     char** targetStart, char* targetEnd, bool strict = true);
   }