1 // -*- c-basic-offset: 2 -*-
3 * This file is part of the KDE libraries
4 * Copyright (C) 2006 George Staikos <staikos@kde.org>
5 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
6 * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
25 #ifndef KJS_UNICODE_ICU_H
26 #define KJS_UNICODE_ICU_H
28 #include <unicode/uchar.h>
29 #include <unicode/ustring.h>
30 #include <unicode/utf16.h>
38 LeftToRight = U_LEFT_TO_RIGHT,
39 RightToLeft = U_RIGHT_TO_LEFT,
40 EuropeanNumber = U_EUROPEAN_NUMBER,
41 EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
42 EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
43 ArabicNumber = U_ARABIC_NUMBER,
44 CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
45 BlockSeparator = U_BLOCK_SEPARATOR,
46 SegmentSeparator = U_SEGMENT_SEPARATOR,
47 WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
48 OtherNeutral = U_OTHER_NEUTRAL,
49 LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
50 LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
51 RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
52 RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
53 RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
54 PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
55 NonSpacingMark = U_DIR_NON_SPACING_MARK,
56 BoundaryNeutral = U_BOUNDARY_NEUTRAL
59 enum DecompositionType {
60 DecompositionNone = U_DT_NONE,
61 DecompositionCanonical = U_DT_CANONICAL,
62 DecompositionCompat = U_DT_COMPAT,
63 DecompositionCircle = U_DT_CIRCLE,
64 DecompositionFinal = U_DT_FINAL,
65 DecompositionFont = U_DT_FONT,
66 DecompositionFraction = U_DT_FRACTION,
67 DecompositionInitial = U_DT_INITIAL,
68 DecompositionIsolated = U_DT_ISOLATED,
69 DecompositionMedial = U_DT_MEDIAL,
70 DecompositionNarrow = U_DT_NARROW,
71 DecompositionNoBreak = U_DT_NOBREAK,
72 DecompositionSmall = U_DT_SMALL,
73 DecompositionSquare = U_DT_SQUARE,
74 DecompositionSub = U_DT_SUB,
75 DecompositionSuper = U_DT_SUPER,
76 DecompositionVertical = U_DT_VERTICAL,
77 DecompositionWide = U_DT_WIDE,
82 Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
83 Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
84 Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
85 Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
86 Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
87 Letter_Other = U_MASK(U_OTHER_LETTER),
89 Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
90 Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
91 Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
93 Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
94 Number_Letter = U_MASK(U_LETTER_NUMBER),
95 Number_Other = U_MASK(U_OTHER_NUMBER),
97 Separator_Space = U_MASK(U_SPACE_SEPARATOR),
98 Separator_Line = U_MASK(U_LINE_SEPARATOR),
99 Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
101 Other_Control = U_MASK(U_CONTROL_CHAR),
102 Other_Format = U_MASK(U_FORMAT_CHAR),
103 Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
104 Other_Surrogate = U_MASK(U_SURROGATE),
106 Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
107 Punctuation_Open = U_MASK(U_START_PUNCTUATION),
108 Punctuation_Close = U_MASK(U_END_PUNCTUATION),
109 Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
110 Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
112 Symbol_Math = U_MASK(U_MATH_SYMBOL),
113 Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
114 Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
115 Symbol_Other = U_MASK(U_OTHER_SYMBOL),
117 Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
118 Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
121 inline UChar32 foldCase(UChar32 c)
123 return u_foldCase(c, U_FOLD_CASE_DEFAULT);
126 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
128 UErrorCode status = U_ZERO_ERROR;
129 int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
130 *error = !U_SUCCESS(status);
134 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
136 UErrorCode status = U_ZERO_ERROR;
137 int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
138 *error = !!U_FAILURE(status);
142 inline UChar32 toLower(UChar32 c)
147 inline UChar32 toUpper(UChar32 c)
152 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
154 UErrorCode status = U_ZERO_ERROR;
155 int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
156 *error = !!U_FAILURE(status);
160 inline UChar32 toTitleCase(UChar32 c)
165 inline bool isFormatChar(UChar32 c)
167 return u_charType(c) == U_FORMAT_CHAR;
170 inline bool isSeparatorSpace(UChar32 c)
172 return u_charType(c) == U_SPACE_SEPARATOR;
175 inline bool isPrintableChar(UChar32 c)
177 return !!u_isprint(c);
180 inline bool isDigit(UChar32 c)
182 return !!u_isdigit(c);
185 inline bool isPunct(UChar32 c)
187 return !!u_ispunct(c);
190 inline UChar32 mirroredChar(UChar32 c)
192 return u_charMirror(c);
195 inline CharCategory category(UChar32 c)
197 return static_cast<CharCategory>(U_GET_GC_MASK(c));
200 inline Direction direction(UChar32 c)
202 return static_cast<Direction>(u_charDirection(c));
205 inline bool isLower(UChar32 c)
207 return !!u_islower(c);
210 inline bool isUpper(UChar32 c)
212 return !!u_isUUppercase(c);
215 inline int digitValue(UChar32 c)
217 return u_charDigitValue(c);
220 inline uint8_t combiningClass(UChar32 c)
222 return u_getCombiningClass(c);
225 inline DecompositionType decompositionType(UChar32 c)
227 return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
230 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
232 return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);