+2006-04-03 George Staikos <staikos@opensource.apple.com>
+
+ Reviewed by Maciej.
+
+ Implement a unicode abstraction layer to make JavaScriptCore much more
+ easily ported to other platforms without having to take in libicu. Also
+ makes the unicode related code easier to understand.
+
2006-04-03 Timothy Hatcher <timothy@apple.com>
Reviewed by Adele.
#include <string.h>
#include <ctype.h>
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
namespace KJS {
case 0x2029:
return true;
default:
- return u_charType(c) == U_SPACE_SEPARATOR;
+ return KXMLCore::Unicode::isSeparatorSpace(c);
}
}
#include "identifier.h"
#include "lookup.h"
#include "internal.h"
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
static bool isDecimalDigit(unsigned short c);
break;
}
next3 = code[pos++].uc;
- } while (u_charType(next3) == U_FORMAT_CHAR);
+ } while (KXMLCore::Unicode::isFormatChar(next3));
}
}
bool Lexer::isWhiteSpace() const
{
- return (current == '\t' || current == 0x0b || current == 0x0c || u_charType(current) == U_SPACE_SEPARATOR);
+ return current == '\t' || current == 0x0b || current == 0x0c || KXMLCore::Unicode::isSeparatorSpace(current);
}
bool Lexer::isLineTerminator()
bool Lexer::isIdentStart(unsigned short c)
{
- return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK)) || c == '$' || c == '_';
+ return (KXMLCore::Unicode::category(c) & (KXMLCore::Unicode::Letter_Uppercase
+ | KXMLCore::Unicode::Letter_Lowercase
+ | KXMLCore::Unicode::Letter_Titlecase
+ | KXMLCore::Unicode::Letter_Modifier
+ | KXMLCore::Unicode::Letter_Other))
+ || c == '$' || c == '_';
}
bool Lexer::isIdentPart(unsigned short c)
{
- return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == '$' || c == '_';
+ return (KXMLCore::Unicode::category(c) & (KXMLCore::Unicode::Letter_Uppercase
+ | KXMLCore::Unicode::Letter_Lowercase
+ | KXMLCore::Unicode::Letter_Titlecase
+ | KXMLCore::Unicode::Letter_Modifier
+ | KXMLCore::Unicode::Letter_Other
+ | KXMLCore::Unicode::Mark_NonSpacing
+ | KXMLCore::Unicode::Mark_SpacingCombining
+ | KXMLCore::Unicode::Number_DecimalDigit
+ | KXMLCore::Unicode::Punctuation_Connector))
+ || c == '$' || c == '_';
}
static bool isDecimalDigit(unsigned short c)
using std::max;
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
namespace KJS {
UChar UChar::toLower() const
{
- return static_cast<unsigned short>(u_tolower(uc));
+ return KXMLCore::Unicode::toLower(uc);
}
UChar UChar::toUpper() const
{
- return static_cast<unsigned short>(u_toupper(uc));
+ return KXMLCore::Unicode::toUpper(uc);
}
UCharReference& UCharReference::operator=(UChar c)
// multiple threads only supported on OS X WebKit for now
#if PLATFORM(MAC)
#define KXMLCORE_USE_MULTIPLE_THREADS 1
+#define KXMLCORE_USE_ICU_UNICODE 1
+#endif
+
+#if PLATFORM(KDE)
+#define KXMLCORE_USE_QT4_UNICODE 1
#endif
#endif // KXMLCORE_PLATFORM_H
--- /dev/null
+// -*- c-basic-offset: 2 -*-
+/*
+ * This file is part of the KDE libraries
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_H
+#define KJS_UNICODE_H
+
+#include "../Platform.h"
+
+#if USE(QT4_UNICODE)
+#include "qt4/UnicodeQt4.h"
+#elif USE(ICU_UNICODE)
+#include "icu/UnicodeIcu.h"
+#else
+#error "Unknown unicode implementation"
+#endif
+
+#endif
+// vim: ts=2 sw=2 et
--- /dev/null
+// -*- c-basic-offset: 2 -*-
+/*
+ * This file is part of the KDE libraries
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_COMMON_H
+#define KJS_UNICODE_COMMON_H
+
+namespace KXMLCore {
+ namespace Unicode {
+ enum CharCategory {
+ NoCategory = 0,
+ Mark_NonSpacing = 0x00000001, // Unicode class name Mn
+ Mark_SpacingCombining = 0x00000002, // Unicode class name Mc
+ Mark_Enclosing = 0x00000004, // Unicode class name Me
+ Number_DecimalDigit = 0x00000008, // Unicode class name Nd
+ Number_Letter = 0x00000010, // Unicode class name Nl
+ Number_Other = 0x00000020, // Unicode class name No
+ Separator_Space = 0x00000040, // Unicode class name Zs
+ Separator_Line = 0x00000080, // Unicode class name Zl
+ Separator_Paragraph = 0x00000100, // Unicode class name Zp
+ Other_Control = 0x00000200, // Unicode class name Cc
+ Other_Format = 0x00000400, // Unicode class name Cf
+ Other_Surrogate = 0x00000800, // Unicode class name Cs
+ Other_PrivateUse = 0x00001000, // Unicode class name Co
+ Other_NotAssigned = 0x00002000, // Unicode class name Cn
+ Letter_Uppercase = 0x00004000, // Unicode class name Lu
+ Letter_Lowercase = 0x00008000, // Unicode class name Ll
+ Letter_Titlecase = 0x00010000, // Unicode class name Lt
+ Letter_Modifier = 0x00020000, // Unicode class name Lm
+ Letter_Other = 0x00040000, // Unicode class name Lo
+ Punctuation_Connector = 0x00080000, // Unicode class name Pc
+ Punctuation_Dash = 0x00100000, // Unicode class name Pd
+ Punctuation_Open = 0x00200000, // Unicode class name Ps
+ Punctuation_Close = 0x00400000, // Unicode class name Pe
+ Punctuation_InitialQuote = 0x00800000, // Unicode class name Pi
+ Punctuation_FinalQuote = 0x01000000, // Unicode class name Pf
+ Punctuation_Other = 0x02000000, // Unicode class name Po
+ Symbol_Math = 0x04000000, // Unicode class name Sm
+ Symbol_Currency = 0x08000000, // Unicode class name Sc
+ Symbol_Modifier = 0x10000000, // Unicode class name Sk
+ Symbol_Other = 0x20000000 // Unicode class name So
+ };
+ }
+}
+
+#endif
+// vim: ts=2 sw=2 et
--- /dev/null
+// -*- c-basic-offset: 2 -*-
+/*
+ * This file is part of the KDE libraries
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_ICU_H
+#define KJS_UNICODE_ICU_H
+
+#include <unicode/uchar.h>
+
+#include "../UnicodeCategory.h"
+
+namespace KXMLCore {
+ namespace Unicode {
+ inline unsigned short toLower(unsigned short c) {
+ return u_tolower(c);
+ }
+
+ inline unsigned short toUpper(unsigned short c) {
+ return u_toupper(c);
+ }
+
+ inline bool isFormatChar(unsigned short c) {
+ return u_charType(c) == U_FORMAT_CHAR;
+ }
+
+ inline bool isSeparatorSpace(unsigned short c) {
+ return u_charType(c) == U_SPACE_SEPARATOR;
+ }
+
+ inline CharCategory category(unsigned short c) {
+ switch (u_charType(c)) {
+ case U_NON_SPACING_MARK:
+ return Mark_NonSpacing;
+ case U_COMBINING_SPACING_MARK:
+ return Mark_SpacingCombining;
+ case U_ENCLOSING_MARK:
+ return Mark_Enclosing;
+ case U_DECIMAL_DIGIT_NUMBER:
+ return Number_DecimalDigit;
+ case U_LETTER_NUMBER:
+ return Number_Letter;
+ case U_OTHER_NUMBER:
+ return Number_Other;
+ case U_SPACE_SEPARATOR:
+ return Separator_Space;
+ case U_LINE_SEPARATOR:
+ return Separator_Line;
+ case U_PARAGRAPH_SEPARATOR:
+ return Separator_Paragraph;
+ case U_CONTROL_CHAR:
+ return Other_Control;
+ case U_FORMAT_CHAR:
+ return Other_Format;
+ case U_SURROGATE:
+ return Other_Surrogate;
+ case U_PRIVATE_USE_CHAR:
+ return Other_PrivateUse;
+ case U_GENERAL_OTHER_TYPES:
+ return Other_NotAssigned;
+ case U_UPPERCASE_LETTER:
+ return Letter_Uppercase;
+ case U_LOWERCASE_LETTER:
+ return Letter_Lowercase;
+ case U_TITLECASE_LETTER:
+ return Letter_Titlecase;
+ case U_MODIFIER_LETTER:
+ return Letter_Modifier;
+ case U_OTHER_LETTER:
+ return Letter_Other;
+ case U_CONNECTOR_PUNCTUATION:
+ return Punctuation_Connector;
+ case U_DASH_PUNCTUATION:
+ return Punctuation_Dash;
+ case U_START_PUNCTUATION:
+ return Punctuation_Open;
+ case U_END_PUNCTUATION:
+ return Punctuation_Close;
+ case U_INITIAL_PUNCTUATION:
+ return Punctuation_InitialQuote;
+ case U_FINAL_PUNCTUATION:
+ return Punctuation_FinalQuote;
+ case U_OTHER_PUNCTUATION:
+ return Punctuation_Other;
+ case U_MATH_SYMBOL:
+ return Symbol_Math;
+ case U_CURRENCY_SYMBOL:
+ return Symbol_Currency;
+ case U_MODIFIER_SYMBOL:
+ return Symbol_Modifier;
+ case U_OTHER_SYMBOL:
+ return Symbol_Other;
+ default:
+ return NoCategory;
+ }
+ }
+ }
+}
+
+#endif
+// vim: ts=2 sw=2 et
--- /dev/null
+// -*- c-basic-offset: 2 -*-
+/*
+ * This file is part of the KDE libraries
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_QT4_H
+#define KJS_UNICODE_QT4_H
+
+#include <QChar>
+
+#include "../UnicodeCategory.h"
+
+namespace KXMLCore {
+ namespace Unicode {
+ inline unsigned short toLower(unsigned short c) {
+ return (unsigned short)QChar(c).toLower().unicode();
+ }
+
+ inline unsigned short toUpper(unsigned short c) {
+ return (unsigned short)QChar(c).toUpper().unicode();
+ }
+
+ inline bool isFormatChar(unsigned short c) {
+ return QChar(c).category() == QChar::Other_Format;
+ }
+
+ inline bool isSeparatorSpace(unsigned short c) {
+ return QChar(c).category() == QChar::Separator_Space;
+ }
+
+ inline CharCategory category(unsigned short c) {
+ switch (QChar(c).category()) {
+ case QChar::Mark_NonSpacing:
+ return Mark_NonSpacing;
+ case QChar::Mark_SpacingCombining:
+ return Mark_SpacingCombining;
+ case QChar::Mark_Enclosing:
+ return Mark_Enclosing;
+ case QChar::Number_DecimalDigit:
+ return Number_DecimalDigit;
+ case QChar::Number_Letter:
+ return Number_Letter;
+ case QChar::Number_Other:
+ return Number_Other;
+ case QChar::Separator_Space:
+ return Separator_Space;
+ case QChar::Separator_Line:
+ return Separator_Line;
+ case QChar::Separator_Paragraph:
+ return Separator_Paragraph;
+ case QChar::Other_Control:
+ return Other_Control;
+ case QChar::Other_Format:
+ return Other_Format;
+ case QChar::Other_Surrogate:
+ return Other_Surrogate;
+ case QChar::Other_PrivateUse:
+ return Other_PrivateUse;
+ case QChar::Other_NotAssigned:
+ return Other_NotAssigned;
+ case QChar::Letter_Uppercase:
+ return Letter_Uppercase;
+ case QChar::Letter_Lowercase:
+ return Letter_Lowercase;
+ case QChar::Letter_Titlecase:
+ return Letter_Titlecase;
+ case QChar::Letter_Modifier:
+ return Letter_Modifier;
+ case QChar::Letter_Other:
+ return Letter_Other;
+ case QChar::Punctuation_Connector:
+ return Punctuation_Connector;
+ case QChar::Punctuation_Dash:
+ return Punctuation_Dash;
+ case QChar::Punctuation_Open:
+ return Punctuation_Open;
+ case QChar::Punctuation_Close:
+ return Punctuation_Close;
+ case QChar::Punctuation_InitialQuote:
+ return Punctuation_InitialQuote;
+ case QChar::Punctuation_FinalQuote:
+ return Punctuation_FinalQuote;
+ case QChar::Punctuation_Other:
+ return Punctuation_Other;
+ case QChar::Symbol_Math:
+ return Symbol_Math;
+ case QChar::Symbol_Currency:
+ return Symbol_Currency;
+ case QChar::Symbol_Modifier:
+ return Symbol_Modifier;
+ case QChar::Symbol_Other:
+ return Symbol_Other;
+ default:
+ return NoCategory;
+ }
+ }
+ }
+}
+
+#endif
+// vim: ts=2 sw=2 et