WebCore:
authorap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 13 Apr 2006 17:03:16 +0000 (17:03 +0000)
committerap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 13 Apr 2006 17:03:16 +0000 (17:03 +0000)
        Reviewed by Darin.

        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=7602
        Only use fixupChar for entities

        * html/HTMLTokenizer.cpp: Only use fixUpChar() when handling entities.
        (WebCore::HTMLTokenizer::parseSpecial):
        (WebCore::HTMLTokenizer::parseText):
        (WebCore::HTMLTokenizer::parseTag):
        (WebCore::HTMLTokenizer::write):
        * platform/StreamingTextDecoder.cpp:
        (WebCore::StreamingTextDecoder::convert): Remove the special case for Latin-1, because it is already handled
        via effectiveEncoding().
        * platform/StreamingTextDecoder.h: Remove convertLatin1().

LayoutTests:
        Tests that Latin-1 is handled as windows-1252
        (see http://bugzilla.opendarwin.org/show_bug.cgi?id=7602). Some existing tests
        already did cover this, but only indirectly.

        * fast/encoding/latin1-winlatin-expected.txt: Added.
        * fast/encoding/latin1-winlatin.html: Added.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@13863 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/encoding/latin1-winlatin-expected.txt [new file with mode: 0644]
LayoutTests/fast/encoding/latin1-winlatin.html [new file with mode: 0644]
WebCore/ChangeLog
WebCore/html/HTMLTokenizer.cpp
WebCore/platform/StreamingTextDecoder.cpp
WebCore/platform/StreamingTextDecoder.h

index 88b83a0ca07ac9f12b5769f182564bc6fd39b624..08e29bd6cf112dbef68f6d496ea39dcca430f604 100644 (file)
@@ -1,3 +1,14 @@
+2006-04-13  Alexey Proskuryakov  <ap@nypop.com>
+
+        Test created by Maciej.
+
+        Tests that Latin-1 is handled as windows-1252
+        (see http://bugzilla.opendarwin.org/show_bug.cgi?id=7602). Some existing tests
+        already did cover this, but only indirectly.
+
+        * fast/encoding/latin1-winlatin-expected.txt: Added.
+        * fast/encoding/latin1-winlatin.html: Added.
+
 2006-04-12  Justin Garcia  <justin.garcia@apple.com>
 
         Reviewed by harrison
         * editing/deleting/merge-unrendered-space-expected.txt: Added.
         * editing/deleting/merge-unrendered-space.html: Added.
 
+2006-04-10  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=8157
+          Make HTTP tests using Perl use .pl extension
+
+        * http/tests/incremental/slow-utf8-text.pl: Added (renamed from .text).
+        * http/tests/incremental/slow-utf8-text.text: Removed.
+        * http/tests/incremental/split-hex-entities.pl: Added (renamed from .html).
+        * http/tests/incremental/split-hex-entities.html: Removed.
+        * http/tests/incremental/.htaccess: Removed.
+
 2006-04-10  Darin Adler  <darin@apple.com>
 
         * fast/dom/gc-4-expected.txt: Add one blank line (test result was failing).
diff --git a/LayoutTests/fast/encoding/latin1-winlatin-expected.txt b/LayoutTests/fast/encoding/latin1-winlatin-expected.txt
new file mode 100644 (file)
index 0000000..aa7f47a
--- /dev/null
@@ -0,0 +1,19 @@
+We need to treat Latin-1 as if it were windows-1252, see bug 7602.
+
+As entities: €\81ƒ†‡
+
+(escaped): %u20AC%81%u0192%u2020%u2021
+
+Inline: €\81ƒ†‡
+
+(escaped): %u20AC%81%u0192%u2020%u2021
+
+document.write of unicode escapes: \80\81\83\86\87
+
+(escaped): %80%81%83%86%87
+
+innerHTML assignment of unicode escapes: \80\81\83\86\87
+
+(escaped): %80%81%83%86%87
+
+
diff --git a/LayoutTests/fast/encoding/latin1-winlatin.html b/LayoutTests/fast/encoding/latin1-winlatin.html
new file mode 100644 (file)
index 0000000..6df98d8
--- /dev/null
@@ -0,0 +1,15 @@
+<html>
+<head>
+<meta http-equiv='Content-Type' content='text/html; charset=latin1'>
+</head>
+<body>
+<script>
+  if (window.layoutTestController)
+    layoutTestController.dumpAsText();
+</script>
+<p>We need to treat Latin-1 as if it were windows-1252, see <a href="http://bugzilla.opendarwin.org/show_bug.cgi?id=7602">bug 7602</a>.</p>
+<p>As entities: <span id='entities'>&#x80;&#x81;&#x83;&#x86;&#x87;</span></p>
+<script>document.write('<p>(escaped): ' + escape(document.getElementById('entities').innerHTML) + '</p>');</script><p>Inline: <span id='inline'>\80\81\83\86\87</span></p>
+<script>document.write('<p>(escaped): ' + escape(document.getElementById('inline').innerHTML) + '</p>');</script><p>document.write of unicode escapes: <script>document.write('<span id="write">\x80\x81\u0083\u0086\u0087</span>');</script></p><script>document.write('<p>(escaped): ' + escape(document.getElementById('write').innerHTML) + '</p>');</script><p>innerHTML assignment of unicode escapes: <span id='inner'></span></p><script>document.getElementById('inner').innerHTML = '\x80\x81\u0083\u0086\u0087';</script><script>document.write('<p>(escaped): ' + escape(document.getElementById('inner').innerHTML) + '</p>');</script>
+</body>
+</html>
index e424a311f9782d2ccd96b8933c873578f17a6aef..4df123395cd89f4c0485466662af9f984edcad7d 100644 (file)
@@ -1,3 +1,20 @@
+2006-04-13  Alexey Proskuryakov  <ap@nypop.com>
+
+        Reviewed by Darin.
+
+        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=7602
+        Only use fixupChar for entities
+
+        * html/HTMLTokenizer.cpp: Only use fixUpChar() when handling entities.
+        (WebCore::HTMLTokenizer::parseSpecial):
+        (WebCore::HTMLTokenizer::parseText):
+        (WebCore::HTMLTokenizer::parseTag):
+        (WebCore::HTMLTokenizer::write):
+        * platform/StreamingTextDecoder.cpp:
+        (WebCore::StreamingTextDecoder::convert): Remove the special case for Latin-1, because it is already handled 
+        via effectiveEncoding().
+        * platform/StreamingTextDecoder.h: Remove convertLatin1().
+
 2006-04-13  Darin Adler  <darin@apple.com>
 
         * platform/mac/GraphicsContextMac.mm: Fix one no-SVG compile problem by adding
index 27453c4e517ca472d2cdc0776984d4ba3a9ed334..8bdda264498707cd196e36f25cdcfe83b2b649a6 100644 (file)
@@ -8,6 +8,7 @@
               (C) 1999 Antti Koivisto (koivisto@kde.org)
               (C) 2001 Dirk Mueller (mueller@kde.org)
     Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.
+    Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Library General Public
@@ -81,11 +82,7 @@ static const char titleEnd [] = "</title";
 //
 // There may be better equivalents
 
-// We need this for entities at least. For non-entity text, we could
-// handle this in the text encoding.
-
-// To cover non-entity text, I think this function would need to be called
-// in more places. There seem to be some places that don't call fixUpChar.
+// We only need this for entities. For non-entity text, we handle this in the text encoding.
 
 static const unsigned short windowsLatin1ExtensionArray[32] = {
     0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
@@ -328,7 +325,7 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta
             scriptCodeSize = scriptCodeDest-scriptCode;
         }
         else {
-            scriptCode[scriptCodeSize++] = fixUpChar(*src);
+            scriptCode[scriptCodeSize++] = *src;
             ++src;
         }
     }
@@ -638,7 +635,7 @@ HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state)
             state.setSkipLF(true);
             *dest++ = '\n';
         } else
-            *dest++ = fixUpChar(cc);
+            *dest++ = cc;
         ++src;
     }
 
@@ -1062,7 +1059,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
                         break;
                     }
                 }
-                *dest++ = fixUpChar(*src);
+                *dest++ = *src;
                 ++src;
             }
             break;
@@ -1093,7 +1090,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
                     }
                 }
 
-                *dest++ = fixUpChar(*src);
+                *dest++ = *src;
                 ++src;
             }
             break;
@@ -1455,7 +1452,7 @@ bool HTMLTokenizer::write(const SegmentedString &str, bool appendData)
             ++src;
         } else {
             state.setDiscardLF(false);
-            *dest++ = fixUpChar(*src);
+            *dest++ = *src;
             ++src;
         }
     }
index f60a761f6b69cc0e91335bd097741b0e2ffd5652..0d7a256f7c549bbc5eac6db5564624553ff8928e 100644 (file)
@@ -42,12 +42,6 @@ StreamingTextDecoder::StreamingTextDecoder(const TextEncoding& encoding)
 {
 }
 
-DeprecatedString StreamingTextDecoder::convertLatin1(const unsigned char* s, int length)
-{
-    ASSERT(m_numBufferedBytes == 0);
-    return DeprecatedString(reinterpret_cast<const char *>(s), length);
-}
-
 static const UChar replacementCharacter = 0xFFFD;
 static const UChar BOM = 0xFEFF;
 static const int ConversionBufferSize = 16384;
@@ -230,10 +224,6 @@ DeprecatedString StreamingTextDecoder::convert(const unsigned char *chs, int len
     //#define PARTIAL_CHARACTER_HANDLING_TEST_CHUNK_SIZE 1000
 
     switch (m_encoding.encodingID()) {
-    case Latin1Encoding:
-    case WinLatin1Encoding:
-        return convertLatin1(chs, len);
-
     case UTF16Encoding:
         return convertUTF16(chs, len);
 
index 6f81737aadd2f3d7f18507345a580e5e2fb5e459..5727b38f7919d3b615823f6570ff4c6abf69f15f 100644 (file)
@@ -44,7 +44,6 @@ namespace WebCore {
         DeprecatedString convert(const char*chs, int len, bool flush)
             { return convert(reinterpret_cast<const unsigned char*>(chs), len, flush); }
         DeprecatedString convert(const unsigned char* chs, int len, bool flush);
-        DeprecatedString convertLatin1(const unsigned char* chs, int len);
         DeprecatedString convertUTF16(const unsigned char* chs, int len);
         
         // ICU decoding.