xmlDocPtrForString shouldn't upconvert 8-bit string
authorrniwa@webkit.org <rniwa@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 15 Jul 2013 23:27:35 +0000 (23:27 +0000)
committerrniwa@webkit.org <rniwa@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 15 Jul 2013 23:27:35 +0000 (23:27 +0000)
https://bugs.webkit.org/show_bug.cgi?id=118693

Reviewed by Michael Saboff.

Merge https://chromium.googlesource.com/chromium/blink/+/5d9b240db74591c3689415c7b7c5180fadb379e9

Prior to this changeset, we were upconverting the entire XML document.
Let xmllib2 parse a document as iso-8859-1 when it's a 8-bit string.

* xml/parser/XMLDocumentParserLibxml2.cpp:
(WebCore::nativeEndianUTF16Encoding):
(WebCore::xmlDocPtrForString):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@152667 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp

index b16c6081e40df84dc1997e619c0209ce02b9198b..36ab95782807a1fd341cd873a4200b622510ff56 100644 (file)
@@ -1,3 +1,19 @@
+2013-07-15  Ryosuke Niwa  <rniwa@webkit.org>
+
+        xmlDocPtrForString shouldn't upconvert 8-bit string
+        https://bugs.webkit.org/show_bug.cgi?id=118693
+
+        Reviewed by Michael Saboff.
+
+        Merge https://chromium.googlesource.com/chromium/blink/+/5d9b240db74591c3689415c7b7c5180fadb379e9
+
+        Prior to this changeset, we were upconverting the entire XML document.
+        Let xmllib2 parse a document as iso-8859-1 when it's a 8-bit string.
+
+        * xml/parser/XMLDocumentParserLibxml2.cpp:
+        (WebCore::nativeEndianUTF16Encoding):
+        (WebCore::xmlDocPtrForString):
+
 2013-07-15  Roger Fong  <roger_fong@apple.com>
 
         Replace WKCACFTypes include with d3d9 include.
index 6eeab3ef99e910a3f8253664cb1c15d2a52a9b6f..8b8786b7a85b363560ba6222b4378fad7c63c991 100644 (file)
@@ -1388,6 +1388,13 @@ void XMLDocumentParser::doEnd()
 }
 
 #if ENABLE(XSLT)
+static inline const char* nativeEndianUTF16Encoding()
+{
+    const UChar BOM = 0xFEFF;
+    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
+    return BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE";
+}
+
 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
 {
     if (source.isEmpty())
@@ -1396,16 +1403,14 @@ void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const Strin
     // Parse in a single chunk into an xmlDocPtr
     // FIXME: Hook up error handlers so that a failure to parse the main document results in
     // good error messages.
-    const UChar BOM = 0xFEFF;
-    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
+
+    const bool is8Bit = source.is8Bit();
+    const char* characters = is8Bit ? reinterpret_cast<const char*>(source.characters8()) : reinterpret_cast<const char*>(source.characters16());
+    size_t sizeInBytes = source.length() * (is8Bit ? sizeof(LChar) : sizeof(UChar));
+    const char* encoding = is8Bit ? "iso-8859-1" : nativeEndianUTF16Encoding();
 
     XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
-    xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
-                                        source.length() * sizeof(UChar),
-                                        url.latin1().data(),
-                                        BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
-                                        XSLT_PARSE_OPTIONS);
-    return sourceDoc;
+    return xmlReadMemory(characters, sizeInBytes, url.latin1().data(), encoding, XSLT_PARSE_OPTIONS);
 }
 #endif