From 19c158bc717f9fc1794788a1cb272627b053df51 Mon Sep 17 00:00:00 2001 From: ap Date: Sun, 7 May 2006 18:01:59 +0000 Subject: [PATCH] Reviewed by Darin. - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769 TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC Tests: * fast/forms/form-data-encoding.html * fast/forms/form-data-encoding-2.html * platform/mac/TextEncodingMac.cpp: (WebCore::TextEncoding::fromUnicode): Normalize the string; handle surrogate pairs. git-svn-id: https://svn.webkit.org/repository/webkit/trunk@14217 268f45cc-cd09-0410-ab3c-d52691b4dbfc --- LayoutTests/ChangeLog | 11 ++++++ .../forms/form-data-encoding-2-expected.txt | 4 ++ .../fast/forms/form-data-encoding-2.html | 38 +++++++++++++++++++ .../fast/forms/form-data-encoding.html | 33 ++++++++-------- WebCore/ChangeLog | 14 +++++++ WebCore/platform/mac/TextEncodingMac.cpp | 28 ++++++++++---- 6 files changed, 103 insertions(+), 25 deletions(-) create mode 100644 LayoutTests/fast/forms/form-data-encoding-2-expected.txt create mode 100644 LayoutTests/fast/forms/form-data-encoding-2.html diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog index b91e25c16be6..10efce534534 100644 --- a/LayoutTests/ChangeLog +++ b/LayoutTests/ChangeLog @@ -1,3 +1,14 @@ +2006-05-07 Alexey Proskuryakov + + Reviewed by Darin. + + - Tests for http://bugzilla.opendarwin.org/show_bug.cgi?id=8769 + TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC + + * fast/forms/form-data-encoding.html: Cover more cases for Latin-1 encoding. + * fast/forms/form-data-encoding-2.html: Added. Test conversion to UTF-8. + * fast/forms/form-data-encoding-2-expected.txt: Added. + 2006-05-05 Alexey Proskuryakov Reviewed by Maciej. diff --git a/LayoutTests/fast/forms/form-data-encoding-2-expected.txt b/LayoutTests/fast/forms/form-data-encoding-2-expected.txt new file mode 100644 index 000000000000..662eb144284e --- /dev/null +++ b/LayoutTests/fast/forms/form-data-encoding-2-expected.txt @@ -0,0 +1,4 @@ + +Success + + diff --git a/LayoutTests/fast/forms/form-data-encoding-2.html b/LayoutTests/fast/forms/form-data-encoding-2.html new file mode 100644 index 000000000000..ba7a8ad72505 --- /dev/null +++ b/LayoutTests/fast/forms/form-data-encoding-2.html @@ -0,0 +1,38 @@ + + + +Charsets and submitting forms + + +
+
+
+ + + \ No newline at end of file diff --git a/LayoutTests/fast/forms/form-data-encoding.html b/LayoutTests/fast/forms/form-data-encoding.html index 6bc28e1cb45f..47364a1676a3 100644 --- a/LayoutTests/fast/forms/form-data-encoding.html +++ b/LayoutTests/fast/forms/form-data-encoding.html @@ -5,34 +5,33 @@
-
+
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog index 670f0f6f6f22..3ddea82dce4f 100644 --- a/WebCore/ChangeLog +++ b/WebCore/ChangeLog @@ -1,3 +1,17 @@ +2006-05-07 Alexey Proskuryakov + + Reviewed by Darin. + + - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769 + TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC + + Tests: + * fast/forms/forms/form-data-encoding.html + * fast/forms/forms/form-data-encoding-2.html + + * platform/mac/TextEncodingMac.cpp: + (WebCore::TextEncoding::fromUnicode): Normalize the string; handle surrogate pairs. + 2006-05-06 David Hyatt Rename WebCoreFont to FontPlatformData and WebTextRenderer to FontData. Merge them into the diff --git a/WebCore/platform/mac/TextEncodingMac.cpp b/WebCore/platform/mac/TextEncodingMac.cpp index c30f35fce0da..3e6575051e62 100644 --- a/WebCore/platform/mac/TextEncodingMac.cpp +++ b/WebCore/platform/mac/TextEncodingMac.cpp @@ -53,35 +53,47 @@ DeprecatedCString TextEncoding::fromUnicode(const DeprecatedString &qcs, bool al DeprecatedString copy = qcs; copy.replace(QChar('\\'), backslashAsCurrencySymbol()); CFStringRef cfs = copy.getCFString(); + CFMutableStringRef cfms = CFStringCreateMutableCopy(0, 0, cfs); // in rare cases, normalization can make the string longer, thus no limit on its length + CFStringNormalize(cfms, kCFStringNormalizationFormC); CFIndex startPos = 0; - CFIndex charactersLeft = CFStringGetLength(cfs); - DeprecatedCString result(1); // for trailng zero + CFIndex charactersLeft = CFStringGetLength(cfms); + DeprecatedCString result(1); // for trailing zero while (charactersLeft > 0) { CFRange range = CFRangeMake(startPos, charactersLeft); CFIndex bufferLength; - CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength); + CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength); DeprecatedCString chunk(bufferLength + 1); unsigned char *buffer = reinterpret_cast(chunk.data()); - CFIndex charactersConverted = CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength); + CFIndex charactersConverted = CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength); buffer[bufferLength] = 0; result.append(chunk); if (charactersConverted != charactersLeft) { - // FIXME: support surrogate pairs - UniChar badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted); + unsigned int badChar = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted); + ++charactersConverted; + + if ((badChar & 0xfc00) == 0xd800 && // is high surrogate + charactersConverted != charactersLeft) { + UniChar low = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted); + if ((low & 0xfc00) == 0xdc00) { // is low surrogate + badChar <<= 10; + badChar += low; + badChar += 0x10000 - (0xd800 << 10) - 0xdc00; + ++charactersConverted; + } + } char buf[16]; sprintf(buf, "&#%u;", badChar); result.append(buf); - - ++charactersConverted; } startPos += charactersConverted; charactersLeft -= charactersConverted; } + CFRelease(cfms); return result; } -- 2.36.0