2 * Copyright (C) 2010 Apple Inc. All rights reserved.
3 * Copyright (C) 2015 Igalia S.L.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
28 #include "Hyphenation.h"
32 #include "AtomicStringKeyedMRUCache.h"
33 #include "FileSystem.h"
34 #include "GtkUtilities.h"
36 #include <wtf/HashMap.h>
37 #include <wtf/NeverDestroyed.h>
38 #include <wtf/gobject/GUniquePtr.h>
39 #include <wtf/text/AtomicStringHash.h>
40 #include <wtf/text/CString.h>
41 #include <wtf/text/StringView.h>
45 static const char* const gDictionaryDirectories[] = {
47 "/usr/local/share/hyphen",
50 static String extractLocaleFromDictionaryFilePath(const String& filePath)
52 // Dictionary files always have the form "hyph_<locale name>.dic"
53 // so we strip everything except the locale.
54 String fileName = pathGetFileName(filePath);
55 static const int prefixLength = 5;
56 static const int suffixLength = 4;
57 return fileName.substring(prefixLength, fileName.length() - prefixLength - suffixLength);
60 static void scanDirectoryForDicionaries(const char* directoryPath, HashMap<AtomicString, String>& availableLocales)
62 for (const auto& filePath : listDirectory(directoryPath, "hyph_*.dic"))
63 availableLocales.set(AtomicString(extractLocaleFromDictionaryFilePath(filePath)), filePath);
66 #if defined(DEVELOPMENT_BUILD)
67 static void scanTestDictionariesDirectoryIfNecessary(HashMap<AtomicString, String>& availableLocales)
69 // It's unfortunate that we need to look for the dictionaries this way, but
70 // libhyphen doesn't have the concept of installed dictionaries. Instead,
71 // we have this special case for WebKit tests.
72 CString buildDirectory = webkitBuildDirectory();
73 GUniquePtr<char> dictionariesPath(g_build_filename(buildDirectory.data(), "DependenciesGTK", "Root", "webkitgtk-test-dicts", nullptr));
74 if (g_file_test(dictionariesPath.get(), static_cast<GFileTest>(G_FILE_TEST_IS_DIR))) {
75 scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales);
79 // Try alternative dictionaries path for people not using JHBuild.
80 dictionariesPath.reset(g_build_filename(buildDirectory.data(), "webkitgtk-test-dicts", nullptr));
81 scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales);
85 static HashMap<AtomicString, String>& availableLocales()
87 static bool scannedLocales = false;
88 static HashMap<AtomicString, String> availableLocales;
90 if (!scannedLocales) {
91 for (size_t i = 0; i < WTF_ARRAY_LENGTH(gDictionaryDirectories); i++)
92 scanDirectoryForDicionaries(gDictionaryDirectories[i], availableLocales);
94 #if defined(DEVELOPMENT_BUILD)
95 scanTestDictionariesDirectoryIfNecessary(availableLocales);
98 scannedLocales = true;
101 return availableLocales;
104 bool canHyphenate(const AtomicString& localeIdentifier)
106 if (localeIdentifier.isNull())
108 return availableLocales().contains(localeIdentifier);
111 class HyphenationDictionary : public RefCounted<HyphenationDictionary> {
112 WTF_MAKE_NONCOPYABLE(HyphenationDictionary);
113 WTF_MAKE_FAST_ALLOCATED;
115 typedef std::unique_ptr<HyphenDict, void(*)(HyphenDict*)> HyphenDictUniquePtr;
117 virtual ~HyphenationDictionary() { }
118 static RefPtr<HyphenationDictionary> createNull()
120 return adoptRef(new HyphenationDictionary());
123 static RefPtr<HyphenationDictionary> create(const CString& dictPath)
125 return adoptRef(new HyphenationDictionary(dictPath));
128 HyphenDict* libhyphenDictionary() const
130 return m_libhyphenDictionary.get();
134 HyphenationDictionary(const CString& dictPath)
135 : m_libhyphenDictionary(HyphenDictUniquePtr(hnj_hyphen_load(dictPath.data()), hnj_hyphen_free))
139 HyphenationDictionary()
140 : m_libhyphenDictionary(HyphenDictUniquePtr(nullptr, hnj_hyphen_free))
144 HyphenDictUniquePtr m_libhyphenDictionary;
148 RefPtr<HyphenationDictionary> AtomicStringKeyedMRUCache<RefPtr<HyphenationDictionary>>::createValueForNullKey()
150 return HyphenationDictionary::createNull();
154 RefPtr<HyphenationDictionary> AtomicStringKeyedMRUCache<RefPtr<HyphenationDictionary>>::createValueForKey(const AtomicString& localeIdentifier)
156 ASSERT(availableLocales().get(localeIdentifier));
157 return HyphenationDictionary::create(fileSystemRepresentation(availableLocales().get(localeIdentifier)));
160 static AtomicStringKeyedMRUCache<RefPtr<HyphenationDictionary>>& hyphenDictionaryCache()
162 static NeverDestroyed<AtomicStringKeyedMRUCache<RefPtr<HyphenationDictionary>>> cache;
166 static void countLeadingSpaces(const CString& utf8String, int32_t& pointerOffset, int32_t& characterOffset)
170 const char* stringData = utf8String.data();
171 UChar32 character = 0;
172 while (static_cast<unsigned>(pointerOffset) < utf8String.length()) {
173 int32_t nextPointerOffset = pointerOffset;
174 U8_NEXT(stringData, nextPointerOffset, static_cast<int32_t>(utf8String.length()), character);
176 if (character < 0 || !u_isUWhiteSpace(character))
179 pointerOffset = nextPointerOffset;
184 size_t lastHyphenLocation(StringView string, size_t beforeIndex, const AtomicString& localeIdentifier)
186 ASSERT(availableLocales().contains(localeIdentifier));
187 RefPtr<HyphenationDictionary> dictionary = hyphenDictionaryCache().get(localeIdentifier);
189 // libhyphen accepts strings in UTF-8 format, but WebCore can only provide StringView
190 // which stores either UTF-16 or Latin1 data. This is unfortunate for performance
191 // reasons and we should consider switching to a more flexible hyphenation library
192 // if it is available.
193 CString utf8StringCopy = string.toStringWithoutCopying().utf8();
195 // WebCore often passes strings like " wordtohyphenate" to the platform layer. Since
196 // libhyphen isn't advanced enough to deal with leading spaces (presumably CoreFoundation
197 // can), we should find the appropriate indexes into the string to skip them.
198 int32_t leadingSpaceBytes;
199 int32_t leadingSpaceCharacters;
200 countLeadingSpaces(utf8StringCopy, leadingSpaceBytes, leadingSpaceCharacters);
202 // The libhyphen documentation specifies that this array should be 5 bytes longer than
203 // the byte length of the input string.
204 Vector<char> hyphenArray(utf8StringCopy.length() - leadingSpaceBytes + 5);
205 char* hyphenArrayData = hyphenArray.data();
207 char** replacements = nullptr;
208 int* positions = nullptr;
209 int* removedCharacterCounts = nullptr;
210 hnj_hyphen_hyphenate2(dictionary->libhyphenDictionary(),
211 utf8StringCopy.data() + leadingSpaceBytes,
212 utf8StringCopy.length() - leadingSpaceBytes,
214 nullptr, /* output parameter for hyphenated word */
217 &removedCharacterCounts);
220 for (unsigned i = 0; i < utf8StringCopy.length() - leadingSpaceBytes - 1; i++)
221 free(replacements[i]);
226 free(removedCharacterCounts);
228 for (int i = beforeIndex - leadingSpaceCharacters - 1; i >= 0; i--) {
229 // libhyphen will put an odd number in hyphenArrayData at all
230 // hyphenation points. A number & 1 will be true for odd numbers.
231 if (hyphenArrayData[i] & 1)
232 return i + leadingSpaceCharacters;
238 } // namespace WebCore
240 #endif // USE(LIBHYPHEN)