+2006-05-07 Alexey Proskuryakov <ap@nypop.com>
+
+ Reviewed by Darin.
+
+ - Tests for http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
+ TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC
+
+ * fast/forms/form-data-encoding.html: Cover more cases for Latin-1 encoding.
+ * fast/forms/form-data-encoding-2.html: Added. Test conversion to UTF-8.
+ * fast/forms/form-data-encoding-2-expected.txt: Added.
+
2006-05-05 Alexey Proskuryakov <ap@nypop.com>
Reviewed by Maciej.
--- /dev/null
+
+Success
+
+
--- /dev/null
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8">
+<title>Charsets and submitting forms</title>
+</head>
+<body>
+<form action="?" name=f>
+ <input type=hidden name=q value="ё𐐀𝄫çc"><br>
+</form>
+<script>
+if (window.layoutTestController) {
+ layoutTestController.dumpAsText();
+ layoutTestController.waitUntilDone();
+}
+
+if (document.URL.substring(0, 4) == "file") {
+
+ if (document.URL.indexOf('?') == -1) {
+ document.f.q.value += "\u0327"; // cedilla for 'c'
+ document.f.submit();
+
+ } else {
+ if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%D1%91%F0%90%90%80%F0%9D%84%AB%C3%A7%C3%A7"))
+ document.write("<p>Success</p>");
+ else
+ document.write("<p>Failure</p>");
+
+ if (window.layoutTestController)
+ layoutTestController.notifyDone();
+ }
+
+} else {
+
+ document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
+}
+</script>
+</body>
+</html>
\ No newline at end of file
</head>
<body>
<form action="?" name=f>
- <input type=hidden name=q value="тест"><br>
+ <input type=hidden name=q value="ё𐐀𝄫çc"><br>
</form>
<script>
if (window.layoutTestController) {
- layoutTestController.dumpAsText();
- layoutTestController.waitUntilDone();
+ layoutTestController.dumpAsText();
+ layoutTestController.waitUntilDone();
}
if (document.URL.substring(0, 4) == "file") {
- if (document.URL.indexOf('?') == -1) {
+ if (document.URL.indexOf('?') == -1) {
+ document.f.q.value += "\u0327"; // cedilla for 'c'
+ document.f.submit();
- document.f.submit();
-
- } else {
-
- if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%26%231090%3B%26%231077%3B%26%231089%3B%26%231090%3B"))
- document.write("<p>Success</p>");
- else
- document.write("<p>Failure</p>");
-
- if (window.layoutTestController)
- layoutTestController.notifyDone();
- }
+ } else {
+ if (unescape(document.URL.substring(document.URL.indexOf('?')+1, document.URL.length)) == unescape("q=%26%231105%3B%26%2366560%3B%26%23119083%3B%E7%E7"))
+ document.write("<p>Success</p>");
+ else
+ document.write("<p>Failure</p>");
+
+ if (window.layoutTestController)
+ layoutTestController.notifyDone();
+ }
} else {
- document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
+ document.write("<p>This test doesn't work directly from bugzilla, please save it to a local file first.</p>");
}
</script>
</body>
+2006-05-07 Alexey Proskuryakov <ap@nypop.com>
+
+ Reviewed by Darin.
+
+ - http://bugzilla.opendarwin.org/show_bug.cgi?id=8769
+ TextEncoding::fromUnicode() - support non-BMP characters and convert to NFC
+
+ Tests:
+ * fast/forms/forms/form-data-encoding.html
+ * fast/forms/forms/form-data-encoding-2.html
+
+ * platform/mac/TextEncodingMac.cpp:
+ (WebCore::TextEncoding::fromUnicode): Normalize the string; handle surrogate pairs.
+
2006-05-06 David Hyatt <hyatt@apple.com>
Rename WebCoreFont to FontPlatformData and WebTextRenderer to FontData. Merge them into the
DeprecatedString copy = qcs;
copy.replace(QChar('\\'), backslashAsCurrencySymbol());
CFStringRef cfs = copy.getCFString();
+ CFMutableStringRef cfms = CFStringCreateMutableCopy(0, 0, cfs); // in rare cases, normalization can make the string longer, thus no limit on its length
+ CFStringNormalize(cfms, kCFStringNormalizationFormC);
CFIndex startPos = 0;
- CFIndex charactersLeft = CFStringGetLength(cfs);
- DeprecatedCString result(1); // for trailng zero
+ CFIndex charactersLeft = CFStringGetLength(cfms);
+ DeprecatedCString result(1); // for trailing zero
while (charactersLeft > 0) {
CFRange range = CFRangeMake(startPos, charactersLeft);
CFIndex bufferLength;
- CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
+ CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, NULL, 0x7FFFFFFF, &bufferLength);
DeprecatedCString chunk(bufferLength + 1);
unsigned char *buffer = reinterpret_cast<unsigned char *>(chunk.data());
- CFIndex charactersConverted = CFStringGetBytes(cfs, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength);
+ CFIndex charactersConverted = CFStringGetBytes(cfms, range, encoding, allowEntities ? 0 : '?', false, buffer, bufferLength, &bufferLength);
buffer[bufferLength] = 0;
result.append(chunk);
if (charactersConverted != charactersLeft) {
- // FIXME: support surrogate pairs
- UniChar badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+ unsigned int badChar = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted);
+ ++charactersConverted;
+
+ if ((badChar & 0xfc00) == 0xd800 && // is high surrogate
+ charactersConverted != charactersLeft) {
+ UniChar low = CFStringGetCharacterAtIndex(cfms, startPos + charactersConverted);
+ if ((low & 0xfc00) == 0xdc00) { // is low surrogate
+ badChar <<= 10;
+ badChar += low;
+ badChar += 0x10000 - (0xd800 << 10) - 0xdc00;
+ ++charactersConverted;
+ }
+ }
char buf[16];
sprintf(buf, "&#%u;", badChar);
result.append(buf);
-
- ++charactersConverted;
}
startPos += charactersConverted;
charactersLeft -= charactersConverted;
}
+ CFRelease(cfms);
return result;
}