Reviewed by Maciej.
authorstaikos <staikos@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 3 Apr 2006 23:49:39 +0000 (23:49 +0000)
committerstaikos <staikos@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 3 Apr 2006 23:49:39 +0000 (23:49 +0000)
        Implement a unicode abstraction layer to make JavaScriptCore much more
        easily ported to other platforms without having to take in libicu.  Also
        makes the unicode related code easier to understand.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@13663 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/kjs/function.cpp
JavaScriptCore/kjs/lexer.cpp
JavaScriptCore/kjs/ustring.cpp
JavaScriptCore/kxmlcore/Platform.h
JavaScriptCore/kxmlcore/unicode/Unicode.h [new file with mode: 0644]
JavaScriptCore/kxmlcore/unicode/UnicodeCategory.h [new file with mode: 0644]
JavaScriptCore/kxmlcore/unicode/icu/UnicodeIcu.h [new file with mode: 0644]
JavaScriptCore/kxmlcore/unicode/qt4/UnicodeQt4.h [new file with mode: 0644]

index 8a137f0..70bb2c7 100644 (file)
@@ -1,3 +1,11 @@
+2006-04-03  George Staikos   <staikos@opensource.apple.com>
+
+        Reviewed by Maciej.
+
+        Implement a unicode abstraction layer to make JavaScriptCore much more
+        easily ported to other platforms without having to take in libicu.  Also
+        makes the unicode related code easier to understand.
+
 2006-04-03  Timothy Hatcher  <timothy@apple.com>
 
         Reviewed by Adele.
index 85397ef..a5278b6 100644 (file)
@@ -40,7 +40,7 @@
 #include <string.h>
 #include <ctype.h>
 
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
 
 namespace KJS {
 
@@ -661,7 +661,7 @@ static bool isStrWhiteSpace(unsigned short c)
         case 0x2029:
             return true;
         default:
-            return u_charType(c) == U_SPACE_SEPARATOR;
+            return KXMLCore::Unicode::isSeparatorSpace(c);
     }
 }
 
index 989956c..c9e4f3f 100644 (file)
@@ -37,7 +37,7 @@
 #include "identifier.h"
 #include "lookup.h"
 #include "internal.h"
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
 
 static bool isDecimalDigit(unsigned short c);
 
@@ -137,7 +137,7 @@ void Lexer::shift(unsigned int p)
         break;
       }
       next3 = code[pos++].uc;
-    } while (u_charType(next3) == U_FORMAT_CHAR);
+    } while (KXMLCore::Unicode::isFormatChar(next3));
   }
 }
 
@@ -572,7 +572,7 @@ int Lexer::lex()
 
 bool Lexer::isWhiteSpace() const
 {
-  return (current == '\t' || current == 0x0b || current == 0x0c || u_charType(current) == U_SPACE_SEPARATOR);
+  return current == '\t' || current == 0x0b || current == 0x0c || KXMLCore::Unicode::isSeparatorSpace(current);
 }
 
 bool Lexer::isLineTerminator()
@@ -588,12 +588,26 @@ bool Lexer::isLineTerminator()
 
 bool Lexer::isIdentStart(unsigned short c)
 {
-  return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK)) || c == '$' || c == '_';
+  return (KXMLCore::Unicode::category(c) & (KXMLCore::Unicode::Letter_Uppercase
+        | KXMLCore::Unicode::Letter_Lowercase
+        | KXMLCore::Unicode::Letter_Titlecase
+        | KXMLCore::Unicode::Letter_Modifier
+        | KXMLCore::Unicode::Letter_Other))
+    || c == '$' || c == '_';
 }
 
 bool Lexer::isIdentPart(unsigned short c)
 {
-  return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == '$' || c == '_';
+  return (KXMLCore::Unicode::category(c) & (KXMLCore::Unicode::Letter_Uppercase
+        | KXMLCore::Unicode::Letter_Lowercase
+        | KXMLCore::Unicode::Letter_Titlecase
+        | KXMLCore::Unicode::Letter_Modifier
+        | KXMLCore::Unicode::Letter_Other
+        | KXMLCore::Unicode::Mark_NonSpacing
+        | KXMLCore::Unicode::Mark_SpacingCombining
+        | KXMLCore::Unicode::Number_DecimalDigit
+        | KXMLCore::Unicode::Punctuation_Connector))
+    || c == '$' || c == '_';
 }
 
 static bool isDecimalDigit(unsigned short c)
index 2f5a4ff..8606192 100644 (file)
@@ -44,7 +44,7 @@
 
 using std::max;
 
-#include <unicode/uchar.h>
+#include <kxmlcore/unicode/Unicode.h>
 
 namespace KJS {
 
@@ -144,12 +144,12 @@ static int statBufferSize = 0;
 
 UChar UChar::toLower() const
 {
-  return static_cast<unsigned short>(u_tolower(uc));
+  return KXMLCore::Unicode::toLower(uc);
 }
 
 UChar UChar::toUpper() const
 {
-  return static_cast<unsigned short>(u_toupper(uc));
+  return KXMLCore::Unicode::toUpper(uc);
 }
 
 UCharReference& UCharReference::operator=(UChar c)
index 3bdef76..771bca7 100644 (file)
 // multiple threads only supported on OS X WebKit for now
 #if PLATFORM(MAC)
 #define KXMLCORE_USE_MULTIPLE_THREADS 1
+#define KXMLCORE_USE_ICU_UNICODE 1
+#endif
+
+#if PLATFORM(KDE)
+#define KXMLCORE_USE_QT4_UNICODE 1
 #endif
 
 #endif // KXMLCORE_PLATFORM_H
diff --git a/JavaScriptCore/kxmlcore/unicode/Unicode.h b/JavaScriptCore/kxmlcore/unicode/Unicode.h
new file mode 100644 (file)
index 0000000..bde0157
--- /dev/null
@@ -0,0 +1,37 @@
+// -*- c-basic-offset: 2 -*-
+/*
+ *  This file is part of the KDE libraries
+ *  Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_H
+#define KJS_UNICODE_H
+
+#include "../Platform.h"
+
+#if USE(QT4_UNICODE)
+#include "qt4/UnicodeQt4.h"
+#elif USE(ICU_UNICODE)
+#include "icu/UnicodeIcu.h"
+#else
+#error "Unknown unicode implementation"
+#endif
+
+#endif
+// vim: ts=2 sw=2 et
diff --git a/JavaScriptCore/kxmlcore/unicode/UnicodeCategory.h b/JavaScriptCore/kxmlcore/unicode/UnicodeCategory.h
new file mode 100644 (file)
index 0000000..6e79d5e
--- /dev/null
@@ -0,0 +1,65 @@
+// -*- c-basic-offset: 2 -*-
+/*
+ *  This file is part of the KDE libraries
+ *  Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_COMMON_H
+#define KJS_UNICODE_COMMON_H
+
+namespace KXMLCore {
+  namespace Unicode {
+    enum CharCategory {
+      NoCategory = 0,
+      Mark_NonSpacing = 0x00000001,          // Unicode class name Mn
+      Mark_SpacingCombining = 0x00000002,    // Unicode class name Mc
+      Mark_Enclosing = 0x00000004,           // Unicode class name Me
+      Number_DecimalDigit = 0x00000008,      // Unicode class name Nd
+      Number_Letter = 0x00000010,            // Unicode class name Nl
+      Number_Other = 0x00000020,             // Unicode class name No
+      Separator_Space = 0x00000040,          // Unicode class name Zs
+      Separator_Line = 0x00000080,           // Unicode class name Zl
+      Separator_Paragraph = 0x00000100,      // Unicode class name Zp
+      Other_Control = 0x00000200,            // Unicode class name Cc
+      Other_Format = 0x00000400,             // Unicode class name Cf
+      Other_Surrogate = 0x00000800,          // Unicode class name Cs
+      Other_PrivateUse = 0x00001000,         // Unicode class name Co
+      Other_NotAssigned = 0x00002000,        // Unicode class name Cn
+      Letter_Uppercase = 0x00004000,         // Unicode class name Lu
+      Letter_Lowercase = 0x00008000,         // Unicode class name Ll
+      Letter_Titlecase = 0x00010000,         // Unicode class name Lt
+      Letter_Modifier = 0x00020000,          // Unicode class name Lm
+      Letter_Other = 0x00040000,             // Unicode class name Lo
+      Punctuation_Connector = 0x00080000,    // Unicode class name Pc
+      Punctuation_Dash = 0x00100000,         // Unicode class name Pd
+      Punctuation_Open = 0x00200000,         // Unicode class name Ps
+      Punctuation_Close = 0x00400000,        // Unicode class name Pe
+      Punctuation_InitialQuote = 0x00800000, // Unicode class name Pi
+      Punctuation_FinalQuote = 0x01000000,   // Unicode class name Pf
+      Punctuation_Other = 0x02000000,        // Unicode class name Po
+      Symbol_Math = 0x04000000,              // Unicode class name Sm
+      Symbol_Currency = 0x08000000,          // Unicode class name Sc
+      Symbol_Modifier = 0x10000000,          // Unicode class name Sk
+      Symbol_Other = 0x20000000              // Unicode class name So
+    };
+  }
+}
+
+#endif
+// vim: ts=2 sw=2 et
diff --git a/JavaScriptCore/kxmlcore/unicode/icu/UnicodeIcu.h b/JavaScriptCore/kxmlcore/unicode/icu/UnicodeIcu.h
new file mode 100644 (file)
index 0000000..8bb1ee1
--- /dev/null
@@ -0,0 +1,118 @@
+// -*- c-basic-offset: 2 -*-
+/*
+ *  This file is part of the KDE libraries
+ *  Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_ICU_H
+#define KJS_UNICODE_ICU_H
+
+#include <unicode/uchar.h>
+
+#include "../UnicodeCategory.h"
+
+namespace KXMLCore {
+  namespace Unicode {
+    inline unsigned short toLower(unsigned short c) {
+      return u_tolower(c);
+    }
+
+    inline unsigned short toUpper(unsigned short c) {
+      return u_toupper(c);
+    }
+
+    inline bool isFormatChar(unsigned short c) {
+      return u_charType(c) == U_FORMAT_CHAR;
+    }
+
+    inline bool isSeparatorSpace(unsigned short c) {
+      return u_charType(c) == U_SPACE_SEPARATOR;
+    }
+
+    inline CharCategory category(unsigned short c) {
+      switch (u_charType(c)) {
+        case U_NON_SPACING_MARK:
+          return Mark_NonSpacing;
+        case U_COMBINING_SPACING_MARK:
+          return Mark_SpacingCombining;
+        case U_ENCLOSING_MARK:
+          return Mark_Enclosing;
+        case U_DECIMAL_DIGIT_NUMBER:
+          return Number_DecimalDigit;
+        case U_LETTER_NUMBER:
+          return Number_Letter;
+        case U_OTHER_NUMBER:
+          return Number_Other;
+        case U_SPACE_SEPARATOR:
+          return Separator_Space;
+        case U_LINE_SEPARATOR:
+          return Separator_Line;
+        case U_PARAGRAPH_SEPARATOR:
+          return Separator_Paragraph;
+        case U_CONTROL_CHAR:
+          return Other_Control;
+        case U_FORMAT_CHAR:
+          return Other_Format;
+        case U_SURROGATE:
+          return Other_Surrogate;
+        case U_PRIVATE_USE_CHAR:
+          return Other_PrivateUse;
+        case U_GENERAL_OTHER_TYPES:
+          return Other_NotAssigned;
+        case U_UPPERCASE_LETTER:
+          return Letter_Uppercase;
+        case U_LOWERCASE_LETTER:
+          return Letter_Lowercase;
+        case U_TITLECASE_LETTER:
+          return Letter_Titlecase;
+        case U_MODIFIER_LETTER:
+          return Letter_Modifier;
+        case U_OTHER_LETTER:
+          return Letter_Other;
+        case U_CONNECTOR_PUNCTUATION:
+          return Punctuation_Connector;
+        case U_DASH_PUNCTUATION:
+          return Punctuation_Dash;
+        case U_START_PUNCTUATION:
+          return Punctuation_Open;
+        case U_END_PUNCTUATION:
+          return Punctuation_Close;
+        case U_INITIAL_PUNCTUATION:
+          return Punctuation_InitialQuote;
+        case U_FINAL_PUNCTUATION:
+          return Punctuation_FinalQuote;
+        case U_OTHER_PUNCTUATION:
+          return Punctuation_Other;
+        case U_MATH_SYMBOL:
+          return Symbol_Math;
+        case U_CURRENCY_SYMBOL:
+          return Symbol_Currency;
+        case U_MODIFIER_SYMBOL:
+          return Symbol_Modifier;
+        case U_OTHER_SYMBOL:
+          return Symbol_Other;
+        default:
+          return NoCategory;
+      }
+    }
+  }
+}
+
+#endif
+// vim: ts=2 sw=2 et
diff --git a/JavaScriptCore/kxmlcore/unicode/qt4/UnicodeQt4.h b/JavaScriptCore/kxmlcore/unicode/qt4/UnicodeQt4.h
new file mode 100644 (file)
index 0000000..03e0157
--- /dev/null
@@ -0,0 +1,118 @@
+// -*- c-basic-offset: 2 -*-
+/*
+ *  This file is part of the KDE libraries
+ *  Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef KJS_UNICODE_QT4_H
+#define KJS_UNICODE_QT4_H
+
+#include <QChar>
+
+#include "../UnicodeCategory.h"
+
+namespace KXMLCore {
+  namespace Unicode {
+    inline unsigned short toLower(unsigned short c) {
+      return (unsigned short)QChar(c).toLower().unicode();
+    }
+
+    inline unsigned short toUpper(unsigned short c) {
+      return (unsigned short)QChar(c).toUpper().unicode();
+    }
+
+    inline bool isFormatChar(unsigned short c) {
+      return QChar(c).category() == QChar::Other_Format;
+    }
+
+    inline bool isSeparatorSpace(unsigned short c) {
+      return QChar(c).category() == QChar::Separator_Space;
+    }
+
+    inline CharCategory category(unsigned short c) {
+      switch (QChar(c).category()) {
+        case QChar::Mark_NonSpacing:
+          return Mark_NonSpacing;
+        case QChar::Mark_SpacingCombining:
+          return Mark_SpacingCombining;
+        case QChar::Mark_Enclosing:
+          return Mark_Enclosing;
+        case QChar::Number_DecimalDigit:
+          return Number_DecimalDigit;
+        case QChar::Number_Letter:
+          return Number_Letter;
+        case QChar::Number_Other:
+          return Number_Other;
+        case QChar::Separator_Space:
+          return Separator_Space;
+        case QChar::Separator_Line:
+          return Separator_Line;
+        case QChar::Separator_Paragraph:
+          return Separator_Paragraph;
+        case QChar::Other_Control:
+          return Other_Control;
+        case QChar::Other_Format:
+          return Other_Format;
+        case QChar::Other_Surrogate:
+          return Other_Surrogate;
+        case QChar::Other_PrivateUse:
+          return Other_PrivateUse;
+        case QChar::Other_NotAssigned:
+          return Other_NotAssigned;
+        case QChar::Letter_Uppercase:
+          return Letter_Uppercase;
+        case QChar::Letter_Lowercase:
+          return Letter_Lowercase;
+        case QChar::Letter_Titlecase:
+          return Letter_Titlecase;
+        case QChar::Letter_Modifier:
+          return Letter_Modifier;
+        case QChar::Letter_Other:
+          return Letter_Other;
+        case QChar::Punctuation_Connector:
+          return Punctuation_Connector;
+        case QChar::Punctuation_Dash:
+          return Punctuation_Dash;
+        case QChar::Punctuation_Open:
+          return Punctuation_Open;
+        case QChar::Punctuation_Close:
+          return Punctuation_Close;
+        case QChar::Punctuation_InitialQuote:
+          return Punctuation_InitialQuote;
+        case QChar::Punctuation_FinalQuote:
+          return Punctuation_FinalQuote;
+        case QChar::Punctuation_Other:
+          return Punctuation_Other;
+        case QChar::Symbol_Math:
+          return Symbol_Math;
+        case QChar::Symbol_Currency:
+          return Symbol_Currency;
+        case QChar::Symbol_Modifier:
+          return Symbol_Modifier;
+        case QChar::Symbol_Other:
+          return Symbol_Other;
+        default:
+          return NoCategory;
+      }
+    }
+  }
+}
+
+#endif
+// vim: ts=2 sw=2 et