Stop compiling our own cursorMovementIterator()
[WebKit-https.git] / Source / WTF / wtf / text / TextBreakIterator.cpp
1 /*
2  * (C) 1999 Lars Knoll (knoll@kde.org)
3  * Copyright (C) 2004-2016 Apple Inc. All rights reserved.
4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public License
17  * along with this library; see the file COPYING.LIB.  If not, write to
18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21
22 #include "config.h"
23 #include "TextBreakIterator.h"
24
25 #include "LineBreakIteratorPoolICU.h"
26 #include "TextBreakIteratorInternalICU.h"
27 #include "UTextProviderLatin1.h"
28 #include "UTextProviderUTF16.h"
29 #include <atomic>
30 #include <mutex>
31 #include <unicode/ubrk.h>
32 #include <wtf/text/StringBuilder.h>
33
34 // FIXME: This needs a better name
35 #define ADDITIONAL_EMOJI_SUPPORT (PLATFORM(IOS) || (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 101100))
36
37 namespace WTF {
38
39 #if !PLATFORM(MAC) && !PLATFORM(IOS)
40
41 static Variant<TextBreakIteratorICU, TextBreakIteratorPlatform> mapModeToBackingIterator(StringView string, TextBreakIterator::Mode mode, const AtomicString& locale)
42 {
43     switch (mode) {
44     case TextBreakIterator::Mode::Line:
45         return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Line, locale.string().utf8().data());
46     case TextBreakIterator::Mode::Cursor:
47         return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
48     case TextBreakIterator::Mode::Delete:
49         return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
50     default:
51         ASSERT_NOT_REACHED();
52         return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
53     }
54 }
55
56 TextBreakIterator::TextBreakIterator(StringView string, Mode mode, const AtomicString& locale)
57     : m_backing(mapModeToBackingIterator(string, mode, locale))
58     , m_mode(mode)
59     , m_locale(locale)
60 {
61 }
62
63 #endif
64
65 // Iterator initialization
66
67 static UBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
68 {
69     UErrorCode openStatus = U_ZERO_ERROR;
70     UBreakIterator* iterator = ubrk_open(type, locale, 0, 0, &openStatus);
71     ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
72     return iterator;
73 }
74
75 // Iterator text setting
76
77 static UBreakIterator* setTextForIterator(UBreakIterator& iterator, StringView string)
78 {
79     if (string.is8Bit()) {
80         UTextWithBuffer textLocal;
81         textLocal.text = UTEXT_INITIALIZER;
82         textLocal.text.extraSize = sizeof(textLocal.buffer);
83         textLocal.text.pExtra = textLocal.buffer;
84
85         UErrorCode openStatus = U_ZERO_ERROR;
86         UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus);
87         if (U_FAILURE(openStatus)) {
88             LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus);
89             return nullptr;
90         }
91
92         UErrorCode setTextStatus = U_ZERO_ERROR;
93         ubrk_setUText(&iterator, text, &setTextStatus);
94         if (U_FAILURE(setTextStatus)) {
95             LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
96             return nullptr;
97         }
98
99         utext_close(text);
100     } else {
101         UErrorCode setTextStatus = U_ZERO_ERROR;
102         ubrk_setText(&iterator, string.characters16(), string.length(), &setTextStatus);
103         if (U_FAILURE(setTextStatus))
104             return nullptr;
105     }
106
107     return &iterator;
108 }
109
110 static UBreakIterator* setContextAwareTextForIterator(UBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength)
111 {
112     if (string.is8Bit()) {
113         UTextWithBuffer textLocal;
114         textLocal.text = UTEXT_INITIALIZER;
115         textLocal.text.extraSize = sizeof(textLocal.buffer);
116         textLocal.text.pExtra = textLocal.buffer;
117
118         UErrorCode openStatus = U_ZERO_ERROR;
119         UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus);
120         if (U_FAILURE(openStatus)) {
121             LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus);
122             return nullptr;
123         }
124
125         UErrorCode setTextStatus = U_ZERO_ERROR;
126         ubrk_setUText(&iterator, text, &setTextStatus);
127         if (U_FAILURE(setTextStatus)) {
128             LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
129             return nullptr;
130         }
131
132         utext_close(text);
133     } else {
134         UText textLocal = UTEXT_INITIALIZER;
135
136         UErrorCode openStatus = U_ZERO_ERROR;
137         UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus);
138         if (U_FAILURE(openStatus)) {
139             LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus);
140             return 0;
141         }
142
143         UErrorCode setTextStatus = U_ZERO_ERROR;
144         ubrk_setUText(&iterator, text, &setTextStatus);
145         if (U_FAILURE(setTextStatus)) {
146             LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
147             return nullptr;
148         }
149
150         utext_close(text);
151     }
152
153     return &iterator;
154 }
155
156
157 // Static iterators
158
159 UBreakIterator* wordBreakIterator(StringView string)
160 {
161     static UBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD);
162     if (!staticWordBreakIterator)
163         return nullptr;
164
165     return setTextForIterator(*staticWordBreakIterator, string);
166 }
167
168 UBreakIterator* sentenceBreakIterator(StringView string)
169 {
170     static UBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE);
171     if (!staticSentenceBreakIterator)
172         return nullptr;
173
174     return setTextForIterator(*staticSentenceBreakIterator, string);
175 }
176
177 UBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode mode)
178 {
179     UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode);
180     if (!iterator)
181         return nullptr;
182
183     return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength);
184 }
185
186 void releaseLineBreakIterator(UBreakIterator* iterator)
187 {
188     ASSERT_ARG(iterator, iterator);
189
190     LineBreakIteratorPool::sharedPool().put(iterator);
191 }
192
193 UBreakIterator* openLineBreakIterator(const AtomicString& locale)
194 {
195     bool localeIsEmpty = locale.isEmpty();
196     UErrorCode openStatus = U_ZERO_ERROR;
197     UBreakIterator* ubrkIter = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus);
198     // locale comes from a web page and it can be invalid, leading ICU
199     // to fail, in which case we fall back to the default locale.
200     if (!localeIsEmpty && U_FAILURE(openStatus)) {
201         openStatus = U_ZERO_ERROR;
202         ubrkIter = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus);
203     }
204
205     if (U_FAILURE(openStatus)) {
206         LOG_ERROR("ubrk_open failed with status %d", openStatus);
207         return nullptr;
208     }
209
210     return ubrkIter;
211 }
212
213 void closeLineBreakIterator(UBreakIterator*& iterator)
214 {
215     UBreakIterator* ubrkIter = iterator;
216     ASSERT(ubrkIter);
217     ubrk_close(ubrkIter);
218     iterator = nullptr;
219 }
220
221 static std::atomic<UBreakIterator*> nonSharedCharacterBreakIterator = ATOMIC_VAR_INIT(nullptr);
222
223 static inline UBreakIterator* getNonSharedCharacterBreakIterator()
224 {
225     if (auto *res = nonSharedCharacterBreakIterator.exchange(nullptr, std::memory_order_acquire))
226         return res;
227     return initializeIterator(UBRK_CHARACTER);
228 }
229
230 static inline void cacheNonSharedCharacterBreakIterator(UBreakIterator* cacheMe)
231 {
232     if (auto *old = nonSharedCharacterBreakIterator.exchange(cacheMe, std::memory_order_release))
233         ubrk_close(old);
234 }
235
236 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string)
237 {
238     if ((m_iterator = getNonSharedCharacterBreakIterator()))
239         m_iterator = setTextForIterator(*m_iterator, string);
240 }
241
242 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
243 {
244     if (m_iterator)
245         cacheNonSharedCharacterBreakIterator(m_iterator);
246 }
247
248 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&& other)
249     : m_iterator(nullptr)
250 {
251     std::swap(m_iterator, other.m_iterator);
252 }
253
254 // Iterator implemenation.
255
256 bool isWordTextBreak(UBreakIterator* iterator)
257 {
258     int ruleStatus = ubrk_getRuleStatus(iterator);
259     return ruleStatus != UBRK_WORD_NONE;
260 }
261
262 unsigned numGraphemeClusters(StringView string)
263 {
264     unsigned stringLength = string.length();
265     
266     if (!stringLength)
267         return 0;
268
269     // The only Latin-1 Extended Grapheme Cluster is CRLF.
270     if (string.is8Bit()) {
271         auto* characters = string.characters8();
272         unsigned numCRLF = 0;
273         for (unsigned i = 1; i < stringLength; ++i)
274             numCRLF += characters[i - 1] == '\r' && characters[i] == '\n';
275         return stringLength - numCRLF;
276     }
277
278     NonSharedCharacterBreakIterator iterator { string };
279     if (!iterator) {
280         ASSERT_NOT_REACHED();
281         return stringLength;
282     }
283
284     unsigned numGraphemeClusters = 0;
285     while (ubrk_next(iterator) != UBRK_DONE)
286         ++numGraphemeClusters;
287     return numGraphemeClusters;
288 }
289
290 unsigned numCharactersInGraphemeClusters(StringView string, unsigned numGraphemeClusters)
291 {
292     unsigned stringLength = string.length();
293
294     if (stringLength <= numGraphemeClusters)
295         return stringLength;
296
297     // The only Latin-1 Extended Grapheme Cluster is CRLF.
298     if (string.is8Bit()) {
299         auto* characters = string.characters8();
300         unsigned i, j;
301         for (i = 0, j = 0; i < numGraphemeClusters && j + 1 < stringLength; ++i, ++j)
302             j += characters[j] == '\r' && characters[j + 1] == '\n';
303         return j + (i < numGraphemeClusters);
304     }
305
306     NonSharedCharacterBreakIterator iterator { string };
307     if (!iterator) {
308         ASSERT_NOT_REACHED();
309         return stringLength;
310     }
311
312     for (unsigned i = 0; i < numGraphemeClusters; ++i) {
313         if (ubrk_next(iterator) == UBRK_DONE)
314             return stringLength;
315     }
316     return ubrk_current(iterator);
317 }
318
319 } // namespace WTF