2009-12-08 Dominik Röttsches <dominik.roettsches@access-company.com>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 8 Dec 2009 14:58:50 +0000 (14:58 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 8 Dec 2009 14:58:50 +0000 (14:58 +0000)
        Reviewed by Gustavo Noronha Silva.

        [Gtk] Create a TextBreakIterator implementation based on GLib (without ICU)
        https://bugs.webkit.org/show_bug.cgi?id=31469

        Removing hybrid configuration for --with-unicode-backend=glib
        ICU not required anymore.

        * autotools/webkit.m4:
2009-12-08  Dominik Röttsches  <dominik.roettsches@access-company.com>

        Reviewed by Gustavo Noronha Silva.

        [Gtk] Create a TextBreakIterator implementation based on GLib (without ICU)
        https://bugs.webkit.org/show_bug.cgi?id=31469

        Added a TextBreakIterator implementation based on GLib and pango,
        which allows compiling WebCore without ICU.

        * GNUmakefile.am:
        * platform/text/gtk/TextBreakIteratorGtk.cpp: Added.
        (WebCore::):
        (WebCore::setUpIterator):
        (WebCore::characterBreakIterator):
        (WebCore::cursorMovementIterator):
        (WebCore::wordBreakIterator):
        (WebCore::lineBreakIterator):
        (WebCore::sentenceBreakIterator):
        (WebCore::textBreakFirst):
        (WebCore::textBreakLast):
        (WebCore::textBreakNext):
        (WebCore::textBreakPrevious):
        (WebCore::textBreakPreceding):
        (WebCore::textBreakFollowing):
        (WebCore::textBreakCurrent):
        (WebCore::isTextBreak):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@51848 268f45cc-cd09-0410-ab3c-d52691b4dbfc

ChangeLog
WebCore/ChangeLog
WebCore/GNUmakefile.am
WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp [new file with mode: 0644]
autotools/webkit.m4

index 261164a..92cf95e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2009-12-08  Dominik Röttsches  <dominik.roettsches@access-company.com>
+
+        Reviewed by Gustavo Noronha Silva.
+
+        [Gtk] Create a TextBreakIterator implementation based on GLib (without ICU)
+        https://bugs.webkit.org/show_bug.cgi?id=31469
+
+        Removing hybrid configuration for --with-unicode-backend=glib
+        ICU not required anymore.
+
+        * autotools/webkit.m4:
+
 2009-12-08  Nikolas Zimmermann  <nzimmermann@rim.com>
 
         Rubber-stamped by Maciej Stachowiak.
 2009-12-08  Nikolas Zimmermann  <nzimmermann@rim.com>
 
         Rubber-stamped by Maciej Stachowiak.
index 1c24c6d..668351e 100644 (file)
@@ -1,3 +1,31 @@
+2009-12-08  Dominik Röttsches  <dominik.roettsches@access-company.com>
+
+        Reviewed by Gustavo Noronha Silva.
+
+        [Gtk] Create a TextBreakIterator implementation based on GLib (without ICU)
+        https://bugs.webkit.org/show_bug.cgi?id=31469
+
+        Added a TextBreakIterator implementation based on GLib and pango,
+        which allows compiling WebCore without ICU.
+
+        * GNUmakefile.am:
+        * platform/text/gtk/TextBreakIteratorGtk.cpp: Added.
+        (WebCore::):
+        (WebCore::setUpIterator):
+        (WebCore::characterBreakIterator):
+        (WebCore::cursorMovementIterator):
+        (WebCore::wordBreakIterator):
+        (WebCore::lineBreakIterator):
+        (WebCore::sentenceBreakIterator):
+        (WebCore::textBreakFirst):
+        (WebCore::textBreakLast):
+        (WebCore::textBreakNext):
+        (WebCore::textBreakPrevious):
+        (WebCore::textBreakPreceding):
+        (WebCore::textBreakFollowing):
+        (WebCore::textBreakCurrent):
+        (WebCore::isTextBreak):
+
 2009-12-08  Alexander Pavlov  <apavlov@chromium.org>
 
         Reviewed by Pavel Feldman.
 2009-12-08  Alexander Pavlov  <apavlov@chromium.org>
 
         Reviewed by Pavel Feldman.
index 2f5a5ee..6278e35 100644 (file)
@@ -1654,8 +1654,6 @@ webcore_sources += \
        WebCore/platform/text/TextBoundaries.h \
        WebCore/platform/text/TextBoundaries.cpp \
        WebCore/platform/text/TextBreakIterator.h \
        WebCore/platform/text/TextBoundaries.h \
        WebCore/platform/text/TextBoundaries.cpp \
        WebCore/platform/text/TextBreakIterator.h \
-       WebCore/platform/text/TextBreakIteratorICU.cpp \
-       WebCore/platform/text/TextBreakIteratorInternalICU.h \
        WebCore/platform/text/TextCodec.cpp \
        WebCore/platform/text/TextCodec.h \
        WebCore/platform/text/TextCodecLatin1.cpp \
        WebCore/platform/text/TextCodec.cpp \
        WebCore/platform/text/TextCodec.h \
        WebCore/platform/text/TextCodecLatin1.cpp \
@@ -2035,7 +2033,6 @@ webcoregtk_sources += \
        WebCore/platform/network/soup/ResourceResponseSoup.cpp \
        WebCore/platform/network/soup/ResourceRequest.h \
        WebCore/platform/network/soup/ResourceResponse.h \
        WebCore/platform/network/soup/ResourceResponseSoup.cpp \
        WebCore/platform/network/soup/ResourceRequest.h \
        WebCore/platform/network/soup/ResourceResponse.h \
-       WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp \
        WebCore/workers/SharedWorkerRepository.h
 
 # ----
        WebCore/workers/SharedWorkerRepository.h
 
 # ----
@@ -2044,7 +2041,10 @@ webcoregtk_sources += \
 if USE_ICU_UNICODE
 webcoregtk_sources += \
        WebCore/platform/text/TextCodecICU.cpp \
 if USE_ICU_UNICODE
 webcoregtk_sources += \
        WebCore/platform/text/TextCodecICU.cpp \
-       WebCore/platform/text/TextCodecICU.h
+       WebCore/platform/text/TextCodecICU.h \
+       WebCore/platform/text/TextBreakIteratorICU.cpp \
+       WebCore/platform/text/TextBreakIteratorInternalICU.h \
+       WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
 endif
 
 # ----
 endif
 
 # ----
@@ -2053,7 +2053,8 @@ endif
 if USE_GLIB_UNICODE
 webcoregtk_sources += \
        WebCore/platform/text/gtk/TextCodecGtk.cpp \
 if USE_GLIB_UNICODE
 webcoregtk_sources += \
        WebCore/platform/text/gtk/TextCodecGtk.cpp \
-       WebCore/platform/text/gtk/TextCodecGtk.h
+       WebCore/platform/text/gtk/TextCodecGtk.h \
+       WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
 endif
 
 
 endif
 
 
diff --git a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
new file mode 100644 (file)
index 0000000..7a10b41
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include <pango/pango.h>
+#include <wtf/gtk/GOwnPtr.h>
+
+namespace WebCore {
+
+enum UBreakIteratorType {
+    UBRK_CHARACTER,
+    UBRK_WORD,
+    UBRK_LINE,
+    UBRK_SENTENCE
+};
+
+class TextBreakIterator {
+public:
+    UBreakIteratorType m_type;
+    int m_length;
+    PangoLogAttr* m_logAttrs;
+    int m_index;
+};
+
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+    UBreakIteratorType type, const UChar* string, int length)
+{
+    if (!string)
+        return 0;
+
+    if (!createdIterator) {
+        iterator = new TextBreakIterator();
+        createdIterator = true;
+    }
+    if (!iterator)
+        return 0;
+
+    long utf8len;
+    GOwnPtr<char> utf8;
+    utf8.set(g_utf16_to_utf8(string, length, 0, &utf8len, 0));
+
+    // FIXME: assumes no surrogate pairs
+
+    iterator->m_type = type;
+    iterator->m_length = length;
+    if (createdIterator)
+        g_free(iterator->m_logAttrs);
+    iterator->m_logAttrs = g_new0(PangoLogAttr, length + 1);
+    iterator->m_index = -1;
+    pango_get_log_attrs(utf8.get(), utf8len, -1, 0, iterator->m_logAttrs, length + 1);
+
+    return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+    static bool createdCharacterBreakIterator = false;
+    static TextBreakIterator* staticCharacterBreakIterator;
+    return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+    // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
+    return characterBreakIterator(string, length);
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    static bool createdWordBreakIterator = false;
+    static TextBreakIterator* staticWordBreakIterator;
+    return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    static bool createdLineBreakIterator = false;
+    static TextBreakIterator* staticLineBreakIterator;
+    return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+    static bool createdSentenceBreakIterator = false;
+    static TextBreakIterator* staticSentenceBreakIterator;
+    return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
+}
+
+int textBreakFirst(TextBreakIterator* bi)
+{
+    // see textBreakLast
+    
+    int firstCursorPosition = -1;
+    int pos = 0;
+    while (pos <= bi->m_length && (firstCursorPosition < 0)) {
+        if (bi->m_logAttrs[pos].is_cursor_position)
+            firstCursorPosition = pos;
+    }
+    bi->m_index = firstCursorPosition;
+    return firstCursorPosition;
+}
+
+int textBreakLast(TextBreakIterator* bi)
+{
+    // TextBreakLast is not meant to find just any break according to bi->m_type 
+    // but really the one near the last character.
+    // (cmp ICU documentation for ubrk_first and ubrk_last)
+    // From ICU docs for ubrk_last:
+    // "Determine the index immediately beyond the last character in the text being scanned." 
+
+    // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
+    // If last character position in the original string is a whitespace,
+    // traverse to the left until the first non-white character position is found
+    // and return the position of the first white-space char after this one.
+    // Otherwise return m_length, as "the first character beyond the last" is outside our string.
+    
+    bool whiteSpaceAtTheEnd = true;
+    int nextWhiteSpacePos = bi->m_length;
+    
+    int pos = bi->m_length;
+    while (pos >= 0 && whiteSpaceAtTheEnd) {
+        if (bi->m_logAttrs[pos].is_cursor_position) {
+            if (whiteSpaceAtTheEnd = bi->m_logAttrs[pos].is_white)
+                nextWhiteSpacePos = pos;
+        }
+        pos--;
+    }
+    bi->m_index = nextWhiteSpacePos;
+    return nextWhiteSpacePos;
+}
+
+int textBreakNext(TextBreakIterator* bi)
+{
+    for (int i = bi->m_index + 1; i <= bi->m_length; i++) {
+
+        // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €, 
+        // are not marked as word_start & word_end as opposed to the way ICU does it.
+        // This leads to - for example - different word selection behaviour when right clicking.
+
+        if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
+            || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
+            || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
+            || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
+            bi->m_index = i;
+            return i;
+        }
+    }
+    return TextBreakDone;
+}
+
+int textBreakPrevious(TextBreakIterator* bi)
+{
+    for (int i = bi->m_index - 1; i >= 0; i--) {
+        if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
+            || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
+            || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
+            || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
+            bi->m_index = i;
+            return i;
+        }
+    }
+    return textBreakFirst(bi);
+}
+
+int textBreakPreceding(TextBreakIterator* bi, int pos)
+{
+    bi->m_index = pos;
+    return textBreakPrevious(bi);
+}
+
+int textBreakFollowing(TextBreakIterator* bi, int pos)
+{
+    if (pos < 0)
+        pos = -1;
+    bi->m_index = pos;
+    return textBreakNext(bi);
+}
+
+int textBreakCurrent(TextBreakIterator* bi)
+{
+    return bi->m_index;
+}
+
+bool isTextBreak(TextBreakIterator* bi, int pos)
+{
+    if (bi->m_index < 0)
+        return false;
+
+    return ((bi->m_type == UBRK_LINE && bi->m_logAttrs[bi->m_index].is_line_break)
+        || (bi->m_type == UBRK_WORD && bi->m_logAttrs[bi->m_index].is_word_end)
+        || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[bi->m_index].is_char_break)
+        || (bi->m_type == UBRK_SENTENCE && bi->m_logAttrs[bi->m_index].is_sentence_end) );
+}
+
+}
index 14dd0a5..670cec3 100644 (file)
@@ -139,11 +139,7 @@ esac
 
 AC_MSG_RESULT([$with_unicode_backend])
 
 
 AC_MSG_RESULT([$with_unicode_backend])
 
-# https://bugs.webkit.org/show_bug.cgi?id=15914
-# Splitting ICU removal patch into smaller portions. We compile a hybrid version
-# with the WTF Unicode backend being based on GLib while text codecs and TextBreakIterator
-# keep the ICU dependency. That's why we temporarily add icu headers and libs for glib config case as well.
-if test "$with_unicode_backend" = "icu" -o "$with_unicode_backend" = "glib"; then
+if test "$with_unicode_backend" = "icu"; then
         case "$host" in
             *-*-darwin*)
                UNICODE_CFLAGS="-I$srcdir/JavaScriptCore/icu -I$srcdir/WebCore/icu"
         case "$host" in
             *-*-darwin*)
                UNICODE_CFLAGS="-I$srcdir/JavaScriptCore/icu -I$srcdir/WebCore/icu"
@@ -169,14 +165,7 @@ if test "$with_unicode_backend" = "icu" -o "$with_unicode_backend" = "glib"; the
 fi
 
 if test "$with_unicode_backend" = "glib"; then
 fi
 
 if test "$with_unicode_backend" = "glib"; then
-       # https://bugs.webkit.org/show_bug.cgi?id=15914
-       # Splitting ICU removal patch into smaller portions, that's why we
-       # temporarily retrieve flags & libs info for glib into UNICODEGLIB
-       # instead of UNICODE variable, then concatenate.
-       # Patch 3/4 of the above issue will rename the variable back to UNICODE.
-       PKG_CHECK_MODULES([UNICODEGLIB], [glib-2.0 pango >= 1.21.0])
-       UNICODE_CFLAGS="$UNICODE_CFLAGS $UNICODEGLIB_CFLAGS"
-       UNICODE_LIBS="$UNICODE_LIBS $UNICODEGLIB_LIBS"
+       PKG_CHECK_MODULES([UNICODE], [glib-2.0 pango >= 1.21.0])
 fi
 
 AC_SUBST([UNICODE_CFLAGS])
 fi
 
 AC_SUBST([UNICODE_CFLAGS])