2010-10-21 Carlos Garcia Campos <cgarcia@igalia.com>
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 21 Oct 2010 19:33:10 +0000 (19:33 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 21 Oct 2010 19:33:10 +0000 (19:33 +0000)
        Reviewed by Martin Robinson.

        [GTK] Use GCharsetConverter instead of g_iconv in TextCodecGtk
        https://bugs.webkit.org/show_bug.cgi?id=47896

        * wtf/gobject/GTypedefs.h:
2010-10-21  Carlos Garcia Campos  <cgarcia@igalia.com>

        Reviewed by Martin Robinson.

        [GTK] Use GCharsetConverter instead of g_iconv in TextCodecGtk
        https://bugs.webkit.org/show_bug.cgi?id=47896

        It makes error handling easier. Fixes tests:
         fast/encoding/invalid-multi-byte-over-consumption.html
         fast/encoding/invalid-xml.html
         fast/encoding/japanese-encoding-mix.html
        when building with glib unicode

        * platform/text/gtk/TextCodecGtk.cpp:
        (WebCore::TextCodecGtk::TextCodecGtk):
        (WebCore::TextCodecGtk::~TextCodecGtk):
        (WebCore::TextCodecGtk::createIConvDecoder):
        (WebCore::TextCodecGtk::createIConvEncoder):
        (WebCore::TextCodecGtk::decode):
        (WebCore::TextCodecGtk::encode):
        * platform/text/gtk/TextCodecGtk.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@70257 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/wtf/gobject/GTypedefs.h
WebCore/ChangeLog
WebCore/platform/text/gtk/TextCodecGtk.cpp
WebCore/platform/text/gtk/TextCodecGtk.h

index 332761d..def1abf 100644 (file)
@@ -1,3 +1,12 @@
+2010-10-21  Carlos Garcia Campos  <cgarcia@igalia.com>
+
+        Reviewed by Martin Robinson.
+
+        [GTK] Use GCharsetConverter instead of g_iconv in TextCodecGtk
+        https://bugs.webkit.org/show_bug.cgi?id=47896
+
+        * wtf/gobject/GTypedefs.h:
+
 2010-10-21  Adam Barth  <abarth@webkit.org>
 
         Unreviewed, rolling out r70174.
index b1600c2..76d1b1a 100644 (file)
@@ -38,6 +38,7 @@ typedef void* gpointer;
 
 typedef struct _GAsyncResult GAsyncResult;
 typedef struct _GCancellable GCancellable;
+typedef struct _GCharsetConverter GCharsetConverter;
 typedef struct _GCond GCond;
 typedef struct _GDir GDir;
 typedef struct _GdkAtom* GdkAtom;
index d113ae4..010aa4a 100644 (file)
@@ -1,3 +1,25 @@
+2010-10-21  Carlos Garcia Campos  <cgarcia@igalia.com>
+
+        Reviewed by Martin Robinson.
+
+        [GTK] Use GCharsetConverter instead of g_iconv in TextCodecGtk
+        https://bugs.webkit.org/show_bug.cgi?id=47896
+
+        It makes error handling easier. Fixes tests:
+         fast/encoding/invalid-multi-byte-over-consumption.html
+         fast/encoding/invalid-xml.html
+         fast/encoding/japanese-encoding-mix.html
+        when building with glib unicode
+
+        * platform/text/gtk/TextCodecGtk.cpp:
+        (WebCore::TextCodecGtk::TextCodecGtk):
+        (WebCore::TextCodecGtk::~TextCodecGtk):
+        (WebCore::TextCodecGtk::createIConvDecoder):
+        (WebCore::TextCodecGtk::createIConvEncoder):
+        (WebCore::TextCodecGtk::decode):
+        (WebCore::TextCodecGtk::encode):
+        * platform/text/gtk/TextCodecGtk.h:
+
 2010-10-21  Chris Fleizach  <cfleizach@apple.com>
 
         Reviewed by Beth Dakin.
index 5fa38e2..bf6afcd 100644 (file)
@@ -29,6 +29,7 @@
 #include "config.h"
 #include "TextCodecGtk.h"
 
+#include <gio/gio.h>
 #include "GOwnPtr.h"
 #include "Logging.h"
 #include "PlatformString.h"
@@ -410,106 +411,104 @@ void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar)
 TextCodecGtk::TextCodecGtk(const TextEncoding& encoding)
     : m_encoding(encoding)
     , m_numBufferedBytes(0)
-    , m_iconvDecoder(reinterpret_cast<GIConv>(-1))
-    , m_iconvEncoder(reinterpret_cast<GIConv>(-1))
 {
 }
 
 TextCodecGtk::~TextCodecGtk()
 {
-    if (m_iconvDecoder != reinterpret_cast<GIConv>(-1)) {
-        g_iconv_close(m_iconvDecoder);
-        m_iconvDecoder = reinterpret_cast<GIConv>(-1);
-    }
-    if (m_iconvEncoder != reinterpret_cast<GIConv>(-1)) {
-        g_iconv_close(m_iconvEncoder);
-        m_iconvEncoder = reinterpret_cast<GIConv>(-1);
-    }
 }
 
 void TextCodecGtk::createIConvDecoder() const
 {
-    ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1));
+    ASSERT(!m_iconvDecoder);
 
-    m_iconvDecoder = g_iconv_open(internalEncodingName, m_encoding.name());
+    m_iconvDecoder = adoptPlatformRef(g_charset_converter_new(internalEncodingName, m_encoding.name(), 0));
 }
 
 void TextCodecGtk::createIConvEncoder() const
 {
-    ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1));
+    ASSERT(!m_iconvEncoder);
 
-    m_iconvEncoder = g_iconv_open(m_encoding.name(), internalEncodingName);
+    m_iconvEncoder = adoptPlatformRef(g_charset_converter_new(m_encoding.name(), internalEncodingName, 0));
 }
 
 String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
 {
     // Get a converter for the passed-in encoding.
-    if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
+    if (!m_iconvDecoder)
         createIConvDecoder();
-        ASSERT(m_iconvDecoder != reinterpret_cast<GIConv>(-1));
-        if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
-            LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
-            return String();
-        }
+    if (!m_iconvDecoder) {
+        LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
+        return String();
     }
 
-    size_t countWritten, countRead, conversionLength;
-    const char* conversionBytes;
+    Vector<UChar> result;
+
+    gsize bytesRead = 0;
+    gsize bytesWritten = 0;
+    const gchar* input = bytes;
+    gsize inputLength = length;
+    gchar buffer[ConversionBufferSize];
+    int flags = !length ? G_CONVERTER_INPUT_AT_END : G_CONVERTER_NO_FLAGS;
+    if (flush)
+        flags |= G_CONVERTER_FLUSH;
+
+    bool bufferWasFull = false;
     char* prefixedBytes = 0;
 
     if (m_numBufferedBytes) {
-        conversionLength = length + m_numBufferedBytes;
-        prefixedBytes = static_cast<char*>(fastMalloc(conversionLength));
+        inputLength = length + m_numBufferedBytes;
+        prefixedBytes = static_cast<char*>(fastMalloc(inputLength));
         memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
         memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
-        
-        conversionBytes = prefixedBytes;
-        
+
+        input = prefixedBytes;
+
         // all buffered bytes are consumed now
         m_numBufferedBytes = 0;
-    } else {
-        // no previously buffered partial data, 
-        // just convert the data that was passed in
-        conversionBytes = bytes;
-        conversionLength = length;
     }
 
-    GOwnPtr<GError> err;
-    GOwnPtr<UChar> buffer;
-
-    buffer.outPtr() = reinterpret_cast<UChar*>(g_convert_with_iconv(conversionBytes, conversionLength, m_iconvDecoder, &countRead, &countWritten, &err.outPtr())); 
-
-
-    if (err) {
-        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
-        m_numBufferedBytes = 0; // reset state for subsequent calls to decode
-        fastFree(prefixedBytes);
-        sawError = true;
-        return String();
-    }
-    
-    // Partial input at the end of the string may not result in an error being raised. 
-    // From the gnome library documentation on g_convert_with_iconv:
-    // "Even if the conversion was successful, this may be less than len if there were partial characters at the end of the input."
-    // That's why we need to compare conversionLength against countRead 
-
-    m_numBufferedBytes = conversionLength - countRead;
-    if (m_numBufferedBytes > 0) {
-        if (flush) {
-            LOG_ERROR("Partial bytes at end of input while flush requested.");
-            m_numBufferedBytes = 0; // reset state for subsequent calls to decode
-            fastFree(prefixedBytes);
-            sawError = true;
-            return String();
+    do {
+        GOwnPtr<GError> error;
+        GConverterResult res = g_converter_convert(G_CONVERTER(m_iconvDecoder.get()),
+                                                   input, inputLength,
+                                                   buffer, sizeof(buffer),
+                                                   static_cast<GConverterFlags>(flags),
+                                                   &bytesRead, &bytesWritten,
+                                                   &error.outPtr());
+        input += bytesRead;
+        inputLength -= bytesRead;
+
+        if (res == G_CONVERTER_ERROR) {
+            if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
+                // There is not enough input to fully determine what the conversion should produce,
+                // save it to a buffer to prepend it to the next input.
+                memcpy(m_bufferedBytes, input, inputLength);
+                m_numBufferedBytes = inputLength;
+                inputLength = 0;
+            } else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_NO_SPACE))
+                bufferWasFull = true;
+            else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
+                if (stopOnError)
+                    sawError = true;
+                if (inputLength) {
+                    // Ignore invalid character.
+                    input += 1;
+                    inputLength -= 1;
+                }
+            } else {
+                sawError = true;
+                LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
+                m_numBufferedBytes = 0; // Reset state for subsequent calls to decode.
+                fastFree(prefixedBytes);
+                return String();
+            }
         }
-        memcpy(m_bufferedBytes, conversionBytes + countRead, m_numBufferedBytes);
-    }
 
-    fastFree(prefixedBytes);
-    
-    Vector<UChar> result;
+        result.append(reinterpret_cast<UChar*>(buffer), bytesWritten / sizeof(UChar));
+    } while ((inputLength || bufferWasFull) && !sawError);
 
-    result.append(buffer.get(), countWritten / sizeof(UChar));
+    fastFree(prefixedBytes);
 
     return String::adopt(result);
 }
@@ -519,23 +518,42 @@ CString TextCodecGtk::encode(const UChar* characters, size_t length, Unencodable
     if (!length)
         return "";
 
-    if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
+    if (!m_iconvEncoder)
         createIConvEncoder();
-    if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
+    if (!m_iconvEncoder) {
+        LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
         return CString();
+    }
 
-    size_t count;
-
-    GOwnPtr<GError> err;
-    GOwnPtr<char> buffer;
-
-    buffer.outPtr() = g_convert_with_iconv(reinterpret_cast<const char*>(characters), length * sizeof(UChar), m_iconvEncoder, 0, &count, &err.outPtr());
-    if (err) {
-        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
+    gsize bytesRead = 0;
+    gsize bytesWritten = 0;
+    const gchar* input = reinterpret_cast<const char*>(characters);
+    gsize inputLength = length * sizeof(UChar);
+    gchar buffer[ConversionBufferSize];
+    Vector<char> result;
+    GOwnPtr<GError> error;
+
+    size_t size = 0;
+    do {
+        g_converter_convert(G_CONVERTER(m_iconvEncoder.get()),
+                            input, inputLength,
+                            buffer, sizeof(buffer),
+                            G_CONVERTER_INPUT_AT_END,
+                            &bytesRead, &bytesWritten,
+                            &error.outPtr());
+        input += bytesRead;
+        inputLength -= bytesRead;
+        result.grow(size + bytesWritten);
+        memcpy(result.data() + size, buffer, bytesWritten);
+        size += bytesWritten;
+    } while (inputLength && !error.get());
+
+    if (error) {
+        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
         return CString();
     }
 
-    return CString(buffer.get(), count);
+    return CString(result.data(), size);
 }
 
 } // namespace WebCore
index 65ddfb8..1fb8df9 100644 (file)
@@ -29,6 +29,7 @@
 #ifndef TextCodecGTK_h
 #define TextCodecGTK_h
 
+#include "GRefPtr.h"
 #include <glib.h>
 #include "TextCodec.h"
 #include "TextEncoding.h"
@@ -56,8 +57,8 @@ namespace WebCore {
         TextEncoding m_encoding;
         size_t m_numBufferedBytes;
         unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character        
-        mutable GIConv m_iconvDecoder;
-        mutable GIConv m_iconvEncoder;
+        mutable PlatformRefPtr<GCharsetConverter> m_iconvDecoder;
+        mutable PlatformRefPtr<GCharsetConverter> m_iconvEncoder;
     };
 
 } // namespace WebCore