+2006-03-14 Beth Dakin <bdakin@apple.com>
+
+ Reviewed by Maciej
+
+ These expected results change for http://bugzilla.opendarwin.org/
+ show_bug.cgi?id=4171
+
+ * css1/text_properties/text-transCapitalize-expected.txt:
+ * css1/text_properties/text_transform-expected.checksum:
+ * css1/text_properties/text_transform-expected.png:
+ * css1/text_properties/text_transform-expected.txt:
+ * css2.1/t1605-c545-txttrans-00-b-ag-expected.checksum:
+ * css2.1/t1605-c545-txttrans-00-b-ag-expected.png:
+ * css2.1/t1605-c545-txttrans-00-b-ag-expected.txt:
+
2006-03-14 Darin Adler <darin@apple.com>
Rubber stamped by Hyatt.
RenderText {TEXT} at (21,2) size 11x18
text run at (21,2) width 11: "el"
RenderTableCell {TD} at (57,86) size 710x22 [border: (1px solid #EEEEEE)] [r=2 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,2) size 235x18
- text run at (2,2) width 235: "\x{393}\x{3B5}\x{3C9}\x{3B3}\x{3C1}\x{3B1}\x{3C6}\x{3B9}\x{3BA}\x{3AC}\x{2010}\x{3C3}\x{3C5}\x{3C3}\x{3C7}\x{3B5}\x{3C4}\x{3B9}\x{3C3}\x{3BC}\x{3AD}\x{3BD}\x{3B5}\x{3C2} \x{389}\x{3C4}\x{3B1}"
+ RenderText {TEXT} at (2,2) size 236x18
+ text run at (2,2) width 236: "\x{393}\x{3B5}\x{3C9}\x{3B3}\x{3C1}\x{3B1}\x{3C6}\x{3B9}\x{3BA}\x{3AC}\x{2010}\x{3A3}\x{3C5}\x{3C3}\x{3C7}\x{3B5}\x{3C4}\x{3B9}\x{3C3}\x{3BC}\x{3AD}\x{3BD}\x{3B5}\x{3C2} \x{389}\x{3C4}\x{3B1}"
RenderTableRow {TR} at (0,0) size 0x0
RenderTableCell {TH} at (2,128) size 53x22 [bgcolor=#F8F8F8] [border: (1px solid #EEEEEE)] [r=3 c=0 rs=1 cs=1]
RenderText {TEXT} at (18,2) size 16x18
text run at (18,2) width 16: "en"
RenderTableCell {TD} at (57,110) size 710x58 [border: (1px solid #EEEEEE)] [r=3 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,2) size 459x18
- text run at (2,2) width 459: "'Cept Nut'in Safari\x{2019}s \x{2018}Sure\x{2019} Nai\x{308}ve R\x{E9}sum\x{E9}\x{2014}h\x{E1}c\x{30C}ek Full\x{2010}time One-to-one"
+ RenderText {TEXT} at (2,2) size 479x18
+ text run at (2,2) width 479: "'Cept Nut'in Safari\x{2019}s \x{2018}Sure\x{2019} Nai\x{308}ve R\x{E9}sum\x{E9}\x{2014}H\x{E1}c\x{30C}ek Full\x{2010}Time One-To-One"
RenderBR {BR} at (0,0) size 0x0
- RenderText {TEXT} at (2,20) size 322x18
- text run at (2,20) width 322: "\"Newcastle\x{2011}upon\x{2011}tyne\" Washington\x{2011}on\x{2011}the\x{2011}brazos"
+ RenderText {TEXT} at (2,20) size 345x18
+ text run at (2,20) width 345: "\"Newcastle\x{2011}Upon\x{2011}Tyne\" Washington\x{2011}On\x{2011}The\x{2011}Brazos"
RenderBR {BR} at (0,0) size 0x0
RenderInline {SPAN} at (0,0) size 34x18
RenderText {TEXT} at (2,38) size 34x18
RenderText {TEXT} at (20,2) size 12x18
text run at (20,2) width 12: "fr"
RenderTableCell {TD} at (57,194) size 710x26 [border: (1px solid #EEEEEE)] [r=5 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,6) size 223x18
- text run at (2,6) width 223: "Quelqu'un L\x{2019}amour T'appelles\x{2011}tu 3"
+ RenderText {TEXT} at (2,6) size 229x18
+ text run at (2,6) width 229: "Quelqu'un L\x{2019}amour T'appelles\x{2011}Tu 3"
RenderInline {SUP} at (0,0) size 22x15
- RenderText {TEXT} at (225,2) size 22x15
- text run at (225,2) width 22: "eme"
+ RenderText {TEXT} at (231,2) size 22x15
+ text run at (231,2) width 22: "eme"
RenderTableRow {TR} at (0,0) size 0x0
RenderTableCell {TH} at (2,222) size 53x22 [bgcolor=#F8F8F8] [border: (1px solid #EEEEEE)] [r=6 c=0 rs=1 cs=1]
RenderText {TEXT} at (17,2) size 18x18
text run at (17,2) width 18: "hu"
RenderTableCell {TD} at (57,222) size 710x22 [border: (1px solid #EEEEEE)] [r=6 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,2) size 115x18
- text run at (2,2) width 115: "11-ei London\x{2011}ban"
+ RenderText {TEXT} at (2,2) size 121x18
+ text run at (2,2) width 121: "11-Ei London\x{2011}Ban"
RenderTableRow {TR} at (0,0) size 0x0
RenderTableCell {TH} at (2,246) size 53x22 [bgcolor=#F8F8F8] [border: (1px solid #EEEEEE)] [r=7 c=0 rs=1 cs=1]
RenderText {TEXT} at (20,2) size 13x18
RenderText {TEXT} at (18,2) size 16x18
text run at (18,2) width 16: "ru"
RenderTableCell {TD} at (57,294) size 710x22 [border: (1px solid #EEEEEE)] [r=9 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,2) size 195x18
- text run at (2,2) width 195: "\x{41D}\x{44C}\x{44E}-\x{439}\x{43E}\x{440}\x{43A} 1990-\x{445} 14-vii-1789"
+ RenderText {TEXT} at (2,2) size 207x18
+ text run at (2,2) width 207: "\x{41D}\x{44C}\x{44E}-\x{419}\x{43E}\x{440}\x{43A} 1990-\x{425} 14-Vii-1789"
RenderTableRow {TR} at (0,0) size 0x0
RenderTableCell {TH} at (2,318) size 53x22 [bgcolor=#F8F8F8] [border: (1px solid #EEEEEE)] [r=10 c=0 rs=1 cs=1]
RenderText {TEXT} at (17,2) size 18x18
text run at (17,2) width 18: "tlh"
RenderTableCell {TD} at (57,318) size 710x22 [border: (1px solid #EEEEEE)] [r=10 c=1 rs=1 cs=1]
- RenderText {TEXT} at (2,2) size 202x18
- text run at (2,2) width 202: "TlhIngan Hol Wa''uy' Loghqam"
+ RenderText {TEXT} at (2,2) size 206x18
+ text run at (2,2) width 206: "TlhIngan Hol Wa''Uy' Loghqam"
-011f34abc2c3539f1482ba0b4249b977
\ No newline at end of file
+5ff8ed6ac8e26f1a9ceb6de5fbb2fa34
\ No newline at end of file
RenderBlock {P} at (0,192) size 769x54
RenderText {TEXT} at (0,0) size 750x54
text run at (0,0) width 750: "This Paragraph Is Capitalized And The First Letter In Each Word Should Therefore Appear In Uppercase. Words That"
- text run at (0,18) width 718: "Are In Uppercase In The Source (E.g. USA) Should Remain So. There Should Be A Capital Letter After A Non-"
- text run at (0,36) width 532: "breaking space (&Nbsp;). Both Those Characters Appear In The Previous Sentence."
+ text run at (0,18) width 722: "Are In Uppercase In The Source (E.G. USA) Should Remain So. There Should Be A Capital Letter After A Non-"
+ text run at (0,36) width 538: "Breaking Space (&Nbsp;). Both Those Characters Appear In The Previous Sentence."
RenderBlock {P} at (0,262) size 769x36
RenderText {TEXT} at (0,0) size 753x36
text run at (0,0) width 753: "Words with inline elements inside them should only capitalize the first letter of the word. Therefore, the last word in this"
text run at (0,0) width 718: "This page tests the 'text-transform' property of CSS1. This paragraph has no text transformation and should appear"
text run at (0,18) width 48: "normal."
RenderBlock {P} at (4,56) size 747x54
- RenderText {TEXT} at (0,0) size 717x54
+ RenderText {TEXT} at (0,0) size 718x54
text run at (0,0) width 717: "This Paragraph Is Capitalized And The First Letter In Each Word Should Therefore Appear In Uppercase. Words"
- text run at (0,18) width 714: "That Are In Uppercase In The Source (E.g. USA) Should Remain So. There Should Be A Capital Letter After A"
- text run at (0,36) width 565: "Non-breaking space (&Nbsp;). Both Those Characters Appear In The Previous Sentence."
+ text run at (0,18) width 718: "That Are In Uppercase In The Source (E.G. USA) Should Remain So. There Should Be A Capital Letter After A"
+ text run at (0,36) width 571: "Non-Breaking Space (&Nbsp;). Both Those Characters Appear In The Previous Sentence."
RenderBlock {P} at (4,126) size 747x36
RenderText {TEXT} at (0,0) size 727x36
text run at (0,0) width 727: "Words with inline elements inside them should only capitalize the first letter of the word. Therefore, the last word in"
-91f8f2e40e8fc13026ce8ee21d4e5783
\ No newline at end of file
+06e6bb8613e241a34eac9e6a867ead58
\ No newline at end of file
RenderText {TEXT} at (0,0) size 0x0
RenderBlock {P} at (3,63) size 320x30
RenderText {TEXT} at (0,0) size 230x10
- text run at (0,0) width 230: "Xx Xx X. (X.x. XX) X x "
+ text run at (0,0) width 230: "Xx Xx X. (X.X. XX) X X "
RenderInline {SPAN} at (0,0) size 20x10
RenderText {TEXT} at (230,0) size 20x10
text run at (230,0) width 20: "Xx"
RenderText {TEXT} at (290,0) size 30x10
text run at (290,0) width 30: "xxx"
RenderText {TEXT} at (0,10) size 230x10
- text run at (0,10) width 230: "Pp Pp P. (P.p. PP) P p "
+ text run at (0,10) width 230: "Pp Pp P. (P.P. PP) P P "
RenderInline {SPAN} at (0,0) size 20x10
RenderText {TEXT} at (230,10) size 20x10
text run at (230,10) width 20: "Pp"
RenderText {TEXT} at (290,10) size 30x10
text run at (290,10) width 30: "ppp"
RenderText {TEXT} at (0,20) size 230x10
- text run at (0,20) width 230: "\x{C9}\x{E9} \x{C9}\x{E9} \x{C9}. (\x{C9}.\x{E9}. \x{C9}\x{C9}) \x{C9} \x{E9} "
+ text run at (0,20) width 230: "\x{C9}\x{E9} \x{C9}\x{E9} \x{C9}. (\x{C9}.\x{C9}. \x{C9}\x{C9}) \x{C9} \x{C9} "
RenderInline {SPAN} at (0,0) size 20x10
RenderText {TEXT} at (230,20) size 20x10
text run at (230,20) width 20: "\x{C9}\x{E9}"
+2006-03-14 Beth Dakin <bdakin@apple.com>
+
+ Reviewed by Maciej
+
+ Fix for http://bugzilla.opendarwin.org/show_bug.cgi?id=4171
+ This patch changes the word-break algorithm used to apply text-
+ transform:capitalize to use a UBreakIterator. This fixes some
+ existing edge cases we have in our text-transform:capitalize
+ support and generally makes our results more consistent.
+
+ * platform/StringImpl.cpp:
+ (WebCore::getWordBreakIterator): Returns a UBreakIterator for a
+ given string of a given length.
+ (WebCore::StringImpl::capitalize): Now uses the UBreakIterator to
+ step between words and requires the previous character as input.
+ * platform/StringImpl.h: Change the declaration of capitalize() to
+ take the previous character as a parameter.
+ * rendering/RenderText.cpp:
+ (WebCore::RenderText::setText): Find the previous character and
+ send it to StringImpl::capitalize()
+
2006-03-14 Justin Garcia <justin.garcia@apple.com>
Reviewed by adele
#include "Length.h"
#include <kxmlcore/Assertions.h>
#include <string.h>
+#include <unicode/ubrk.h>
using namespace KXMLCore;
return c;
}
-StringImpl* StringImpl::capitalize(bool runOnString) const
+static UBreakIterator* getWordBreakIterator(const QChar* string, int length)
{
- StringImpl* c = new StringImpl;
- bool haveCapped = runOnString;
- if(!l) return c;
+ // The locale is currently ignored when determining character cluster breaks.
+ // This may change in the future, according to Deborah Goldsmith.
+ static bool createdIterator = false;
+ static UBreakIterator* iterator;
+ UErrorCode status;
+ if (!createdIterator) {
+ status = U_ZERO_ERROR;
+ iterator = ubrk_open(UBRK_WORD, "en_us", 0, 0, &status);
+ createdIterator = true;
+ }
+ if (!iterator)
+ return 0;
- c->s = newQCharVector(l);
- c->l = l;
+ status = U_ZERO_ERROR;
+ ubrk_setText(iterator, reinterpret_cast<const UChar*>(string), length, &status);
+ if (status != U_ZERO_ERROR)
+ return 0;
+
+ return iterator;
+}
- if ( l ) c->s[0] = s[0].upper();
+StringImpl* StringImpl::capitalize(QChar previous) const
+{
+ StringImpl* capitalizedString = new StringImpl;
+ if(!l) return capitalizedString;
+
+ QChar* stringWithPrevious = newQCharVector(l + 1);
+ stringWithPrevious[0] = previous;
+ for (unsigned i = 1; i < l + 1; i++)
+ stringWithPrevious[i] = s[i - 1];
+
+ UBreakIterator* boundary = getWordBreakIterator(stringWithPrevious, l + 1);
+ if (!boundary)
+ return capitalizedString;
- // This patch takes care of a lot of the text_transform: capitalize problems, particularly
- // with the apostrophe. But it is just a temporary fix until we implement UBreakIterator as a
- // way to determine when to break for words.
- for (unsigned int i = 0; i < l; i++) {
- if (haveCapped) {
- if (s[i].isSpace())
- haveCapped = false;
- c->s[i] = s[i];
- } else if (s[i].isLetterOrNumber()) {
- c->s[i] = s[i].upper();
- haveCapped = true;
- } else
- c->s[i] = s[i];
+ capitalizedString->s = newQCharVector(l);
+ capitalizedString->l = l;
+
+ int32_t end;
+ int32_t start = ubrk_first(boundary);
+ for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
+ if (start != 0)
+ capitalizedString->s[start - 1] = stringWithPrevious[start].upper();
+ for (int i = start; i < end; i++)
+ capitalizedString->s[i] = stringWithPrevious[i + 1];
}
- return c;
+ return capitalizedString;
}
int StringImpl::toInt(bool* ok) const
bool isLower() const;
StringImpl* lower() const;
StringImpl* upper() const;
- StringImpl* capitalize(bool runOnString) const;
+ StringImpl* capitalize(QChar previous) const;
int find(const char*, int index = 0, bool caseSensitive = true) const;
int find(QChar, int index = 0) const;
{
// find previous text renderer if one exists
RenderObject* o;
- bool runOnString = false;
+ QChar previous = ' ';
for (o = previousRenderer(); o && o->isInlineFlow(); o = o->previousRenderer())
;
if (o && o->isText()) {
DOMStringImpl* prevStr = static_cast<RenderText*>(o)->string();
- QChar c = (*prevStr)[prevStr->length() - 1];
- if (!c.isSpace())
- runOnString = true;
+ previous = (*prevStr)[prevStr->length() - 1];
}
- str = str->capitalize(runOnString);
+ str = str->capitalize(previous);
}
break;
case UPPERCASE: str = str->upper(); break;