TextBreakIterator's should support Latin-1 for all iterator types (Part 2)
authorweinig@apple.com <weinig@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 16 Jan 2014 03:59:42 +0000 (03:59 +0000)
committerweinig@apple.com <weinig@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 16 Jan 2014 03:59:42 +0000 (03:59 +0000)
https://bugs.webkit.org/show_bug.cgi?id=126856

Reviewed by Ryosuke Niwa.

Move the contents of TextBreakIteratorICU.cpp to TextBreakIterator.cpp and remove TextBreakIteratorICU.cpp.

* CMakeLists.txt:
* GNUmakefile.list.am:
* PlatformGTK.cmake:
* WebCore.vcxproj/WebCore.vcxproj:
* WebCore.vcxproj/WebCore.vcxproj.filters:
* WebCore.xcodeproj/project.pbxproj:
* platform/text/TextBreakIterator.cpp:
* platform/text/TextBreakIteratorICU.cpp: Removed.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@162109 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/CMakeLists.txt
Source/WebCore/ChangeLog
Source/WebCore/GNUmakefile.list.am
Source/WebCore/PlatformGTK.cmake
Source/WebCore/WebCore.vcxproj/WebCore.vcxproj
Source/WebCore/WebCore.vcxproj/WebCore.vcxproj.filters
Source/WebCore/WebCore.xcodeproj/project.pbxproj
Source/WebCore/platform/text/TextBreakIterator.cpp
Source/WebCore/platform/text/TextBreakIteratorICU.cpp [deleted file]

index 2cbe5b8..c36b719 100644 (file)
@@ -2805,7 +2805,6 @@ if (WTF_USE_ICU_UNICODE)
         platform/text/icu/UTextProvider.cpp
         platform/text/icu/UTextProviderLatin1.cpp
         platform/text/icu/UTextProviderUTF16.cpp
-        platform/text/TextBreakIteratorICU.cpp
         platform/text/TextCodecICU.cpp
         platform/text/TextEncodingDetectorICU.cpp
     )
index 675fcb7..a7da22b 100644 (file)
@@ -1,3 +1,21 @@
+2014-01-15  Sam Weinig  <sam@webkit.org>
+
+        TextBreakIterator's should support Latin-1 for all iterator types (Part 2)
+        https://bugs.webkit.org/show_bug.cgi?id=126856
+
+        Reviewed by Ryosuke Niwa.
+
+        Move the contents of TextBreakIteratorICU.cpp to TextBreakIterator.cpp and remove TextBreakIteratorICU.cpp.
+
+        * CMakeLists.txt:
+        * GNUmakefile.list.am:
+        * PlatformGTK.cmake:
+        * WebCore.vcxproj/WebCore.vcxproj:
+        * WebCore.vcxproj/WebCore.vcxproj.filters:
+        * WebCore.xcodeproj/project.pbxproj:
+        * platform/text/TextBreakIterator.cpp:
+        * platform/text/TextBreakIteratorICU.cpp: Removed.
+
 2014-01-15  Eric Carlson  <eric.carlson@apple.com>
 
         MediaSessionManager shouldn't use std::map
index f61380d..6a82bd0 100644 (file)
@@ -6017,7 +6017,6 @@ platform_sources += \
        Source/WebCore/platform/text/TextBoundaries.h \
        Source/WebCore/platform/text/TextBreakIterator.cpp \
        Source/WebCore/platform/text/TextBreakIterator.h \
-       Source/WebCore/platform/text/TextBreakIteratorICU.cpp \
        Source/WebCore/platform/text/TextBreakIteratorInternalICU.h \
        Source/WebCore/platform/text/TextCheckerClient.h \
        Source/WebCore/platform/text/TextChecking.h \
index abca898..0148b36 100644 (file)
@@ -134,7 +134,6 @@ list(APPEND WebCore_SOURCES
     platform/text/icu/UTextProviderLatin1.cpp
     platform/text/icu/UTextProviderUTF16.cpp
     platform/text/LocaleICU.cpp
-    platform/text/TextBreakIteratorICU.cpp
     platform/text/TextCodecICU.cpp
     platform/text/TextEncodingDetectorICU.cpp
 
@@ -264,7 +263,6 @@ list(APPEND WebCorePlatformGTK_SOURCES
     platform/text/icu/UTextProviderLatin1.cpp
     platform/text/icu/UTextProviderUTF16.cpp
     platform/text/LocaleICU.cpp
-    platform/text/TextBreakIteratorICU.cpp
     platform/text/TextCodecICU.cpp
     platform/text/TextEncodingDetectorICU.cpp
 
index 30b2446..3a2baea 100644 (file)
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Production|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Production|x64'">true</ExcludedFromBuild>
     </ClCompile>
-
-    <ClCompile Include="..\platform\text\TextBreakIteratorICU.cpp">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_WinCairo|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_WinCairo|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Production|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Production|x64'">true</ExcludedFromBuild>
-    </ClCompile>
     <ClCompile Include="..\platform\text\TextCodec.cpp">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
index 2e806d8..515fd9b 100644 (file)
     <ClCompile Include="..\platform\text\TextBreakIterator.cpp">
       <Filter>platform\text</Filter>
     </ClCompile>
-    <ClCompile Include="..\platform\text\TextBreakIteratorICU.cpp">
-      <Filter>platform\text</Filter>
-    </ClCompile>
     <ClCompile Include="..\platform\text\TextCodec.cpp">
       <Filter>platform\text</Filter>
     </ClCompile>
index 64a1145..b82051e 100644 (file)
                B2C3DA2F0D006C1D00EF6F26 /* SegmentedString.h in Headers */ = {isa = PBXBuildFile; fileRef = B2C3D9FF0D006C1D00EF6F26 /* SegmentedString.h */; settings = {ATTRIBUTES = (Private, ); }; };
                B2C3DA340D006C1D00EF6F26 /* TextBoundaries.h in Headers */ = {isa = PBXBuildFile; fileRef = B2C3DA040D006C1D00EF6F26 /* TextBoundaries.h */; settings = {ATTRIBUTES = (Private, ); }; };
                B2C3DA360D006C1D00EF6F26 /* TextBreakIterator.h in Headers */ = {isa = PBXBuildFile; fileRef = B2C3DA060D006C1D00EF6F26 /* TextBreakIterator.h */; settings = {ATTRIBUTES = (Private, ); }; };
-               B2C3DA370D006C1D00EF6F26 /* TextBreakIteratorICU.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B2C3DA070D006C1D00EF6F26 /* TextBreakIteratorICU.cpp */; };
                B2C3DA380D006C1D00EF6F26 /* TextBreakIteratorInternalICU.h in Headers */ = {isa = PBXBuildFile; fileRef = B2C3DA080D006C1D00EF6F26 /* TextBreakIteratorInternalICU.h */; settings = {ATTRIBUTES = (Private, ); }; };
                B2C3DA390D006C1D00EF6F26 /* TextCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B2C3DA090D006C1D00EF6F26 /* TextCodec.cpp */; };
                B2C3DA3A0D006C1D00EF6F26 /* TextCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = B2C3DA0A0D006C1D00EF6F26 /* TextCodec.h */; settings = {ATTRIBUTES = (Private, ); }; };
                B2C3D9FF0D006C1D00EF6F26 /* SegmentedString.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = SegmentedString.h; sourceTree = "<group>"; };
                B2C3DA040D006C1D00EF6F26 /* TextBoundaries.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TextBoundaries.h; sourceTree = "<group>"; };
                B2C3DA060D006C1D00EF6F26 /* TextBreakIterator.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TextBreakIterator.h; sourceTree = "<group>"; };
-               B2C3DA070D006C1D00EF6F26 /* TextBreakIteratorICU.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TextBreakIteratorICU.cpp; sourceTree = "<group>"; };
                B2C3DA080D006C1D00EF6F26 /* TextBreakIteratorInternalICU.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TextBreakIteratorInternalICU.h; sourceTree = "<group>"; };
                B2C3DA090D006C1D00EF6F26 /* TextCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TextCodec.cpp; sourceTree = "<group>"; };
                B2C3DA0A0D006C1D00EF6F26 /* TextCodec.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TextCodec.h; sourceTree = "<group>"; };
                                B2C3DA040D006C1D00EF6F26 /* TextBoundaries.h */,
                                BCD8A5EF15F56F2C0098D071 /* TextBreakIterator.cpp */,
                                B2C3DA060D006C1D00EF6F26 /* TextBreakIterator.h */,
-                               B2C3DA070D006C1D00EF6F26 /* TextBreakIteratorICU.cpp */,
                                B2C3DA080D006C1D00EF6F26 /* TextBreakIteratorInternalICU.h */,
                                A7151BD712F1558F005A0F64 /* TextCheckerClient.h */,
                                A77D0011133B0AEB00D6658C /* TextChecking.h */,
                                372C00C4129611F1005C9575 /* TextBoundaries.cpp in Sources */,
                                B2AFFC970D00A5DF0030074D /* TextBoundaries.mm in Sources */,
                                BCD8A5F015F56F2C0098D071 /* TextBreakIterator.cpp in Sources */,
-                               B2C3DA370D006C1D00EF6F26 /* TextBreakIteratorICU.cpp in Sources */,
                                B2AFFC980D00A5DF0030074D /* TextBreakIteratorInternalICUMac.mm in Sources */,
                                A7DBF8DD1276919C006B6008 /* TextCheckingHelper.cpp in Sources */,
                                078E093117D1684200420AA1 /* MediaStreamCenterMac.cpp in Sources */,
index 8414c7b..5399c37 100644 (file)
 #include "config.h"
 #include "TextBreakIterator.h"
 
+#include "LineBreakIteratorPoolICU.h"
+#include "UTextProviderLatin1.h"
+#include "UTextProviderUTF16.h"
+#include <wtf/Atomics.h>
+#include <wtf/text/StringView.h>
+#include <wtf/text/WTFString.h>
+
 namespace WebCore {
 
+// Iterator initialization
+
+static TextBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
+{
+    UErrorCode openStatus = U_ZERO_ERROR;
+    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, locale, 0, 0, &openStatus));
+    ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+    return iterator;
+}
+
+#if !PLATFORM(IOS)
+static TextBreakIterator* initializeIteratorWithRules(const char* breakRules)
+{
+    UParseError parseStatus;
+    UErrorCode openStatus = U_ZERO_ERROR;
+    String rules(breakRules);
+    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.deprecatedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus));
+    ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+    return iterator;
+}
+#endif // !PLATFORM(IOS)
+
+
+// Iterator text setting
+
+static TextBreakIterator* setTextForIterator(TextBreakIterator& iterator, StringView string)
+{
+    if (string.is8Bit()) {
+        UTextWithBuffer textLocal;
+        textLocal.text = UTEXT_INITIALIZER;
+        textLocal.text.extraSize = sizeof(textLocal.buffer);
+        textLocal.text.pExtra = textLocal.buffer;
+
+        UErrorCode openStatus = U_ZERO_ERROR;
+        UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus);
+        if (U_FAILURE(openStatus)) {
+            LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus);
+            return nullptr;
+        }
+
+        UErrorCode setTextStatus = U_ZERO_ERROR;
+        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
+        if (U_FAILURE(setTextStatus)) {
+            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+            return nullptr;
+        }
+
+        utext_close(text);
+    } else {
+        UErrorCode setTextStatus = U_ZERO_ERROR;
+        ubrk_setText(reinterpret_cast<UBreakIterator*>(&iterator), string.characters16(), string.length(), &setTextStatus);
+        if (U_FAILURE(setTextStatus))
+            return nullptr;
+    }
+
+    return &iterator;
+}
+
+static TextBreakIterator* setContextAwareTextForIterator(TextBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength)
+{
+    if (string.is8Bit()) {
+        UTextWithBuffer textLocal;
+        textLocal.text = UTEXT_INITIALIZER;
+        textLocal.text.extraSize = sizeof(textLocal.buffer);
+        textLocal.text.pExtra = textLocal.buffer;
+
+        UErrorCode openStatus = U_ZERO_ERROR;
+        UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus);
+        if (U_FAILURE(openStatus)) {
+            LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus);
+            return nullptr;
+        }
+
+        UErrorCode setTextStatus = U_ZERO_ERROR;
+        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
+        if (U_FAILURE(setTextStatus)) {
+            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+            return nullptr;
+        }
+
+        utext_close(text);
+    } else {
+        UText textLocal = UTEXT_INITIALIZER;
+
+        UErrorCode openStatus = U_ZERO_ERROR;
+        UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus);
+        if (U_FAILURE(openStatus)) {
+            LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus);
+            return 0;
+        }
+
+        UErrorCode setTextStatus = U_ZERO_ERROR;
+        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
+        if (U_FAILURE(setTextStatus)) {
+            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
+            return nullptr;
+        }
+
+        utext_close(text);
+    }
+
+    return &iterator;
+}
+
+
+// Static iterators
+
+TextBreakIterator* wordBreakIterator(const UChar* buffer, int length)
+{
+    static TextBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD);
+    if (!staticWordBreakIterator)
+        return nullptr;
+
+    return setTextForIterator(*staticWordBreakIterator, StringView(buffer, length));
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* buffer, int length)
+{
+    static TextBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE);
+    if (!staticSentenceBreakIterator)
+        return nullptr;
+
+    return setTextForIterator(*staticSentenceBreakIterator, StringView(buffer, length));
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* buffer, int length)
+{
+#if !PLATFORM(IOS)
+    // This rule set is based on character-break iterator rules of ICU 4.0
+    // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
+    // The major differences from the original ones are listed below:
+    // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
+    // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
+    // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
+    // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
+    // * Added rules for regional indicator symbols.
+    static const char* kRules =
+        "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
+        "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
+        "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
+        "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
+        "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
+        "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
+        "$L       = [\\p{Grapheme_Cluster_Break = L}];"
+        "$V       = [\\p{Grapheme_Cluster_Break = V}];"
+        "$T       = [\\p{Grapheme_Cluster_Break = T}];"
+        "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
+        "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
+        "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
+        "$HinV    = \\u094D;"              // Devanagari Sign Virama
+        "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
+        "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
+        "$BenV    = \\u09CD;"              // Bengali Sign Virama
+        "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
+        "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
+        "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
+        "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
+        "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
+        "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
+        "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
+        "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
+        "$OriV    = \\u0B4D;"              // Oriya Sign Virama
+        "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
+        "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
+        "$TelV    = \\u0C4D;"              // Telugu Sign Virama
+        "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
+        "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$KanV    = \\u0CCD;"              // Kannada Sign Virama
+        "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
+        "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "$RI      = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
+        "!!chain;"
+        "!!forward;"
+        "$CR $LF;"
+        "$L ($L | $V | $LV | $LVT);"
+        "($LV | $V) ($V | $T);"
+        "($LVT | $T) $T;"
+        "[^$Control $CR $LF] $Extend;"
+        "[^$Control $CR $LF] $SpacingMark;"
+        "$RI $RI / $RI;"
+        "$RI $RI;"
+        "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
+        "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
+        "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
+        "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
+        "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
+        "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
+        "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
+        "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
+        "!!reverse;"
+        "$LF $CR;"
+        "($L | $V | $LV | $LVT) $L;"
+        "($V | $T) ($LV | $V);"
+        "$T ($LVT | $T);"
+        "$Extend      [^$Control $CR $LF];"
+        "$SpacingMark [^$Control $CR $LF];"
+        "$RI $RI / $RI $RI;"
+        "$RI $RI;"
+        "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
+        "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
+        "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
+        "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
+        "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
+        "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
+        "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
+        "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
+        "!!safe_reverse;"
+        "!!safe_forward;";
+    static TextBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules);
+#else // PLATFORM(IOS)
+    // Use the special Thai character break iterator for all locales
+    static TextBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th");
+#endif // !PLATFORM(IOS)
+
+    if (!staticCursorMovementIterator)
+        return nullptr;
+
+    return setTextForIterator(*staticCursorMovementIterator, StringView(buffer, length));
+}
+
+TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
+{
+    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(LineBreakIteratorPool::sharedPool().take(locale));
+    if (!iterator)
+        return nullptr;
+
+    return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength);
+}
+
+void releaseLineBreakIterator(TextBreakIterator* iterator)
+{
+    ASSERT_ARG(iterator, iterator);
+
+    LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+static TextBreakIterator* nonSharedCharacterBreakIterator;
+
+static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
+{
+#if ENABLE(COMPARE_AND_SWAP)
+    return WTF::weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue);
+#else
+    DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
+    MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
+    if (nonSharedCharacterBreakIterator != expected)
+        return false;
+    nonSharedCharacterBreakIterator = newValue;
+    return true;
+#endif
+}
+
+NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
+{
+    m_iterator = nonSharedCharacterBreakIterator;
+
+    bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
+    if (!createdIterator)
+        m_iterator = initializeIterator(UBRK_CHARACTER);
+    if (!m_iterator)
+        return;
+
+    m_iterator = setTextForIterator(*m_iterator, StringView(buffer, length));
+}
+
+NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
+{
+    if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
+        ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
+}
+
+
+// Iterator implemenation.
+
+int textBreakFirst(TextBreakIterator* iterator)
+{
+    return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakLast(TextBreakIterator* iterator)
+{
+    return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakNext(TextBreakIterator* iterator)
+{
+    return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakPrevious(TextBreakIterator* iterator)
+{
+    return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakPreceding(TextBreakIterator* iterator, int pos)
+{
+    return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
+}
+
+int textBreakFollowing(TextBreakIterator* iterator, int pos)
+{
+    return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
+}
+
+int textBreakCurrent(TextBreakIterator* iterator)
+{
+    return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+bool isTextBreak(TextBreakIterator* iterator, int position)
+{
+    return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
+}
+
+bool isWordTextBreak(TextBreakIterator* iterator)
+{
+    int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
+    return ruleStatus != UBRK_WORD_NONE;
+}
+
 unsigned numGraphemeClusters(const String& s)
 {
     unsigned stringLength = s.length();
diff --git a/Source/WebCore/platform/text/TextBreakIteratorICU.cpp b/Source/WebCore/platform/text/TextBreakIteratorICU.cpp
deleted file mode 100644 (file)
index a1e4503..0000000
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include "LineBreakIteratorPoolICU.h"
-#include "UTextProviderLatin1.h"
-#include "UTextProviderUTF16.h"
-#include <wtf/Atomics.h>
-#include <wtf/text/StringView.h>
-#include <wtf/text/WTFString.h>
-
-using namespace WTF;
-
-namespace WebCore {
-
-// Iterator initialization
-
-static TextBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
-{
-    UErrorCode openStatus = U_ZERO_ERROR;
-    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, locale, 0, 0, &openStatus));
-    ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
-    return iterator;
-}
-
-#if !PLATFORM(IOS)
-static TextBreakIterator* initializeIteratorWithRules(const char* breakRules)
-{
-    UParseError parseStatus;
-    UErrorCode openStatus = U_ZERO_ERROR;
-    String rules(breakRules);
-    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.deprecatedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus));
-    ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
-    return iterator;
-}
-#endif // !PLATFORM(IOS)
-
-
-// Iterator text setting
-
-static TextBreakIterator* setTextForIterator(TextBreakIterator& iterator, StringView string)
-{
-    if (string.is8Bit()) {
-        UTextWithBuffer textLocal;
-        textLocal.text = UTEXT_INITIALIZER;
-        textLocal.text.extraSize = sizeof(textLocal.buffer);
-        textLocal.text.pExtra = textLocal.buffer;
-
-        UErrorCode openStatus = U_ZERO_ERROR;
-        UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus);
-        if (U_FAILURE(openStatus)) {
-            LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus);
-            return nullptr;
-        }
-
-        UErrorCode setTextStatus = U_ZERO_ERROR;
-        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
-        if (U_FAILURE(setTextStatus)) {
-            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
-            return nullptr;
-        }
-
-        utext_close(text);
-    } else {
-        UErrorCode setTextStatus = U_ZERO_ERROR;
-        ubrk_setText(reinterpret_cast<UBreakIterator*>(&iterator), string.characters16(), string.length(), &setTextStatus);
-        if (U_FAILURE(setTextStatus))
-            return nullptr;
-    }
-
-    return &iterator;
-}
-
-static TextBreakIterator* setContextAwareTextForIterator(TextBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength)
-{
-    if (string.is8Bit()) {
-        UTextWithBuffer textLocal;
-        textLocal.text = UTEXT_INITIALIZER;
-        textLocal.text.extraSize = sizeof(textLocal.buffer);
-        textLocal.text.pExtra = textLocal.buffer;
-
-        UErrorCode openStatus = U_ZERO_ERROR;
-        UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus);
-        if (U_FAILURE(openStatus)) {
-            LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus);
-            return nullptr;
-        }
-
-        UErrorCode setTextStatus = U_ZERO_ERROR;
-        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
-        if (U_FAILURE(setTextStatus)) {
-            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
-            return nullptr;
-        }
-
-        utext_close(text);
-    } else {
-        UText textLocal = UTEXT_INITIALIZER;
-
-        UErrorCode openStatus = U_ZERO_ERROR;
-        UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus);
-        if (U_FAILURE(openStatus)) {
-            LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus);
-            return 0;
-        }
-
-        UErrorCode setTextStatus = U_ZERO_ERROR;
-        ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
-        if (U_FAILURE(setTextStatus)) {
-            LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
-            return nullptr;
-        }
-
-        utext_close(text);
-    }
-
-    return &iterator;
-}
-
-
-// Static iterators
-
-TextBreakIterator* wordBreakIterator(const UChar* buffer, int length)
-{
-    static TextBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD);
-    if (!staticWordBreakIterator)
-        return nullptr;
-
-    return setTextForIterator(*staticWordBreakIterator, StringView(buffer, length));
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar* buffer, int length)
-{
-    static TextBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE);
-    if (!staticSentenceBreakIterator)
-        return nullptr;
-
-    return setTextForIterator(*staticSentenceBreakIterator, StringView(buffer, length));
-}
-
-TextBreakIterator* cursorMovementIterator(const UChar* buffer, int length)
-{
-#if !PLATFORM(IOS)
-    // This rule set is based on character-break iterator rules of ICU 4.0
-    // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
-    // The major differences from the original ones are listed below:
-    // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
-    // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
-    // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
-    // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
-    // * Added rules for regional indicator symbols.
-    static const char* kRules =
-        "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
-        "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
-        "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
-        "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
-        "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
-        "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
-        "$L       = [\\p{Grapheme_Cluster_Break = L}];"
-        "$V       = [\\p{Grapheme_Cluster_Break = V}];"
-        "$T       = [\\p{Grapheme_Cluster_Break = T}];"
-        "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
-        "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
-        "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
-        "$HinV    = \\u094D;"              // Devanagari Sign Virama
-        "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
-        "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
-        "$BenV    = \\u09CD;"              // Bengali Sign Virama
-        "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
-        "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
-        "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
-        "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
-        "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
-        "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
-        "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
-        "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
-        "$OriV    = \\u0B4D;"              // Oriya Sign Virama
-        "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
-        "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
-        "$TelV    = \\u0C4D;"              // Telugu Sign Virama
-        "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
-        "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
-        "$KanV    = \\u0CCD;"              // Kannada Sign Virama
-        "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
-        "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
-        "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
-        "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
-        "$RI      = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
-        "!!chain;"
-        "!!forward;"
-        "$CR $LF;"
-        "$L ($L | $V | $LV | $LVT);"
-        "($LV | $V) ($V | $T);"
-        "($LVT | $T) $T;"
-        "[^$Control $CR $LF] $Extend;"
-        "[^$Control $CR $LF] $SpacingMark;"
-        "$RI $RI / $RI;"
-        "$RI $RI;"
-        "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
-        "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
-        "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
-        "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
-        "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
-        "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
-        "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
-        "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
-        "!!reverse;"
-        "$LF $CR;"
-        "($L | $V | $LV | $LVT) $L;"
-        "($V | $T) ($LV | $V);"
-        "$T ($LVT | $T);"
-        "$Extend      [^$Control $CR $LF];"
-        "$SpacingMark [^$Control $CR $LF];"
-        "$RI $RI / $RI $RI;"
-        "$RI $RI;"
-        "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
-        "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
-        "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
-        "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
-        "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
-        "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
-        "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
-        "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
-        "!!safe_reverse;"
-        "!!safe_forward;";
-    static TextBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules);
-#else // PLATFORM(IOS)
-    // Use the special Thai character break iterator for all locales
-    static TextBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th");
-#endif // !PLATFORM(IOS)
-
-    if (!staticCursorMovementIterator)
-        return nullptr;
-
-    return setTextForIterator(*staticCursorMovementIterator, StringView(buffer, length));
-}
-
-TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
-{
-    TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(LineBreakIteratorPool::sharedPool().take(locale));
-    if (!iterator)
-        return nullptr;
-
-    return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength);
-}
-
-void releaseLineBreakIterator(TextBreakIterator* iterator)
-{
-    ASSERT_ARG(iterator, iterator);
-
-    LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-static TextBreakIterator* nonSharedCharacterBreakIterator;
-
-static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
-{
-#if ENABLE(COMPARE_AND_SWAP)
-    return weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue);
-#else
-    DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
-    MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
-    if (nonSharedCharacterBreakIterator != expected)
-        return false;
-    nonSharedCharacterBreakIterator = newValue;
-    return true;
-#endif
-}
-
-NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(const UChar* buffer, int length)
-{
-    m_iterator = nonSharedCharacterBreakIterator;
-
-    bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
-    if (!createdIterator)
-        m_iterator = initializeIterator(UBRK_CHARACTER);
-    if (!m_iterator)
-        return;
-
-    m_iterator = setTextForIterator(*m_iterator, StringView(buffer, length));
-}
-
-NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
-{
-    if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
-        ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
-}
-
-
-// Iterator implemenation.
-
-int textBreakFirst(TextBreakIterator* iterator)
-{
-    return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakLast(TextBreakIterator* iterator)
-{
-    return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakNext(TextBreakIterator* iterator)
-{
-    return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPrevious(TextBreakIterator* iterator)
-{
-    return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPreceding(TextBreakIterator* iterator, int pos)
-{
-    return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakFollowing(TextBreakIterator* iterator, int pos)
-{
-    return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakCurrent(TextBreakIterator* iterator)
-{
-    return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-bool isTextBreak(TextBreakIterator* iterator, int position)
-{
-    return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
-}
-
-bool isWordTextBreak(TextBreakIterator* iterator)
-{
-    int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
-    return ruleStatus != UBRK_WORD_NONE;
-}
-
-}