2010-04-20 Oliver Hunt <oliver@apple.com>
authoroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 20 Apr 2010 22:05:43 +0000 (22:05 +0000)
committeroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 20 Apr 2010 22:05:43 +0000 (22:05 +0000)
        Reviewed by Gavin Barraclough.

        Autogenerate yarr character tables
        https://bugs.webkit.org/show_bug.cgi?id=37877

        Use a python script to automatically generate character tables
        for the builtin YARR character classes.  This allows us to generate
        actual tables as well, by using these tables we can both increase
        performance of the check (for complex builtins) and reduce the actual
        code size.

        4-8% win on string-unpack-code, but lots of noise on other tests so
        i'm only confident saying its a 1% win overall.

        * DerivedSources.make:
        * JavaScriptCore.xcodeproj/project.pbxproj:
        * assembler/AbstractMacroAssembler.h:
        (JSC::AbstractMacroAssembler::ExtendedAddress::ExtendedAddress):
        * assembler/MacroAssembler.h:
        (JSC::MacroAssembler::branchTest8):
        * assembler/MacroAssemblerX86Common.h:
        (JSC::MacroAssemblerX86Common::branchTest8):
        * assembler/MacroAssemblerX86_64.h:
        (JSC::MacroAssemblerX86_64::branchTest8):
        * assembler/X86Assembler.h:
        (JSC::X86Assembler::cmpb_im):
        (JSC::X86Assembler::testb_im):
        * bytecode/SamplingTool.cpp:
        (JSC::SamplingTool::dump):
        * create_regex_tables: Added.
        * yarr/RegexCompiler.cpp:
        (JSC::Yarr::CharacterClassConstructor::charClass):
        * yarr/RegexJIT.cpp:
        (JSC::Yarr::RegexGenerator::matchCharacterClass):
        (JSC::Yarr::RegexGenerator::generatePatternCharacterGreedy):
        (JSC::Yarr::RegexGenerator::generatePatternCharacterNonGreedy):
        (JSC::Yarr::RegexGenerator::generateCharacterClassGreedy):
        * yarr/RegexPattern.h:
        (JSC::Yarr::CharacterClassTable::create):
        (JSC::Yarr::CharacterClassTable::CharacterClassTable):
        (JSC::Yarr::CharacterClass::CharacterClass):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@57925 268f45cc-cd09-0410-ab3c-d52691b4dbfc

14 files changed:
JavaScriptCore/ChangeLog
JavaScriptCore/DerivedSources.make
JavaScriptCore/DerivedSources.pro
JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
JavaScriptCore/assembler/AbstractMacroAssembler.h
JavaScriptCore/assembler/MacroAssembler.h
JavaScriptCore/assembler/MacroAssemblerX86Common.h
JavaScriptCore/assembler/MacroAssemblerX86_64.h
JavaScriptCore/assembler/X86Assembler.h
JavaScriptCore/bytecode/SamplingTool.cpp
JavaScriptCore/create_regex_tables [new file with mode: 0644]
JavaScriptCore/yarr/RegexCompiler.cpp
JavaScriptCore/yarr/RegexJIT.cpp
JavaScriptCore/yarr/RegexPattern.h

index 9c16332..59788f0 100644 (file)
@@ -1,3 +1,47 @@
+2010-04-20  Oliver Hunt  <oliver@apple.com>
+
+        Reviewed by Gavin Barraclough.
+
+        Autogenerate yarr character tables
+        https://bugs.webkit.org/show_bug.cgi?id=37877
+
+        Use a python script to automatically generate character tables
+        for the builtin YARR character classes.  This allows us to generate
+        actual tables as well, by using these tables we can both increase
+        performance of the check (for complex builtins) and reduce the actual
+        code size.
+
+        4-8% win on string-unpack-code, but lots of noise on other tests so
+        i'm only confident saying its a 1% win overall.
+
+        * DerivedSources.make:
+        * JavaScriptCore.xcodeproj/project.pbxproj:
+        * assembler/AbstractMacroAssembler.h:
+        (JSC::AbstractMacroAssembler::ExtendedAddress::ExtendedAddress):
+        * assembler/MacroAssembler.h:
+        (JSC::MacroAssembler::branchTest8):
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::branchTest8):
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::branchTest8):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::cmpb_im):
+        (JSC::X86Assembler::testb_im):
+        * bytecode/SamplingTool.cpp:
+        (JSC::SamplingTool::dump):
+        * create_regex_tables: Added.
+        * yarr/RegexCompiler.cpp:
+        (JSC::Yarr::CharacterClassConstructor::charClass):
+        * yarr/RegexJIT.cpp:
+        (JSC::Yarr::RegexGenerator::matchCharacterClass):
+        (JSC::Yarr::RegexGenerator::generatePatternCharacterGreedy):
+        (JSC::Yarr::RegexGenerator::generatePatternCharacterNonGreedy):
+        (JSC::Yarr::RegexGenerator::generateCharacterClassGreedy):
+        * yarr/RegexPattern.h:
+        (JSC::Yarr::CharacterClassTable::create):
+        (JSC::Yarr::CharacterClassTable::CharacterClassTable):
+        (JSC::Yarr::CharacterClass::CharacterClass):
+
 2010-04-20  Gavin Barraclough  <barraclough@apple.com>
 
         Reviewed by NOBODY (speculative windows fix - missed a bit!).
index 9eaccab..4fc9cad 100644 (file)
@@ -48,6 +48,7 @@ all : \
     RegExpObject.lut.h \
     StringPrototype.lut.h \
     docs/bytecode.html \
+    RegExpJitTables.h \
 #
 
 # lookup tables for classes
@@ -74,3 +75,7 @@ chartables.c : dftables
 
 docs/bytecode.html: make-bytecode-docs.pl Interpreter.cpp 
        perl $^ $@
+
+#character tables for Yarr
+RegExpJitTables.h: create_regex_tables
+       python $^ > $@
index ac5b51c..7c5aad8 100644 (file)
@@ -93,3 +93,9 @@ ctgen.commands = perl $$ctgen.wkScript ${QMAKE_FILE_OUT} $$PREPROCESSOR
 ctgen.clean = ${QMAKE_FILE_OUT} ${QMAKE_VAR_JSC_GENERATED_SOURCES_DIR}${QMAKE_FILE_BASE}
 addExtraCompiler(ctgen)
 
+#GENERATOR: "RegExpJitTables.h": tables used by Yarr
+retgen.output = $$JSC_GENERATED_SOURCES_DIR/RegExpJitTables.h
+retgen.wkScript = $$PWD/create_regex_tables 
+retgen.input = retgen.wkScript
+retgen.commands = python $$retgen.wkScript > ${QMAKE_FILE_OUT}
+addExtraCompiler(retgen)
index fd44716..53d43e2 100644 (file)
                969A09220ED1E09C00F1F681 /* Completion.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Completion.cpp; sourceTree = "<group>"; };
                96DD73780F9DA3100027FBCC /* VMTags.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VMTags.h; sourceTree = "<group>"; };
                97F6903A1169DF7F00A6BB46 /* Terminator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Terminator.h; sourceTree = "<group>"; };
+               A718F61A11754A21002465A7 /* RegExpJitTables.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RegExpJitTables.h; sourceTree = "<group>"; };
+               A718F8211178EB4B002465A7 /* create_regex_tables */ = {isa = PBXFileReference; explicitFileType = text.script.python; fileEncoding = 4; path = create_regex_tables; sourceTree = "<group>"; };
                A72700770DAC605600E548D7 /* JSNotAnObject.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSNotAnObject.h; sourceTree = "<group>"; };
                A72700780DAC605600E548D7 /* JSNotAnObject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSNotAnObject.cpp; sourceTree = "<group>"; };
                A72701B30DADE94900E548D7 /* ExceptionHelpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ExceptionHelpers.h; sourceTree = "<group>"; };
                0867D691FE84028FC02AAC07 /* JavaScriptCore */ = {
                        isa = PBXGroup;
                        children = (
+                               A718F8211178EB4B002465A7 /* create_regex_tables */,
                                937B63CC09E766D200A671DD /* DerivedSources.make */,
                                F692A8540255597D01FF60F7 /* create_hash_table */,
                                14B8ECA60A5653980062BE54 /* JavaScriptCore.exp */,
                650FDF8D09D0FCA700769E54 /* Derived Sources */ = {
                        isa = PBXGroup;
                        children = (
+                               A718F61A11754A21002465A7 /* RegExpJitTables.h */,
                                BC18C5230E16FC8A00B34460 /* ArrayPrototype.lut.h */,
                                65B174BE09D1000200820339 /* chartables.c */,
                                BCD203E70E1718F4002C7E82 /* DatePrototype.lut.h */,
index aad70e4..f96e34a 100644 (file)
@@ -81,6 +81,17 @@ public:
         int32_t offset;
     };
 
+    struct ExtendedAddress {
+        explicit ExtendedAddress(RegisterID base, intptr_t offset = 0)
+            : base(base)
+            , offset(offset)
+        {
+        }
+        
+        RegisterID base;
+        intptr_t offset;
+    };
+
     // ImplicitAddress:
     //
     // This class is used for explicit 'load' and 'store' operations
index e6f698f..ce1be78 100644 (file)
@@ -331,6 +331,11 @@ public:
     {
         return branchSub32(cond, imm, dest);
     }
+    using MacroAssemblerBase::branchTest8;
+    Jump branchTest8(Condition cond, ExtendedAddress address, Imm32 mask = Imm32(-1))
+    {
+        return MacroAssemblerBase::branchTest8(cond, Address(address.base, address.offset), mask);
+    }
 #endif
 
 };
index 073f563..27c2f15 100644 (file)
@@ -731,6 +731,16 @@ public:
             m_assembler.testb_im(mask.m_value, address.offset, address.base);
         return Jump(m_assembler.jCC(x86Condition(cond)));
     }
+    
+    Jump branchTest8(Condition cond, BaseIndex address, Imm32 mask = Imm32(-1))
+    {
+        ASSERT((cond == Zero) || (cond == NonZero));
+        if (mask.m_value == -1)
+            m_assembler.cmpb_im(0, address.offset, address.base, address.index, address.scale);
+        else
+            m_assembler.testb_im(mask.m_value, address.offset, address.base, address.index, address.scale);
+        return Jump(m_assembler.jCC(x86Condition(cond)));
+    }
 
     Jump jump()
     {
index b36c323..339eaa4 100644 (file)
@@ -409,6 +409,14 @@ public:
         return label;
     }
 
+    using MacroAssemblerX86Common::branchTest8;
+    Jump branchTest8(Condition cond, ExtendedAddress address, Imm32 mask = Imm32(-1))
+    {
+        ImmPtr addr(reinterpret_cast<void*>(address.offset));
+        MacroAssemblerX86Common::move(addr, scratchRegister);
+        return MacroAssemblerX86Common::branchTest8(cond, BaseIndex(scratchRegister, address.base, TimesOne), mask);
+    }
+
     Label loadPtrWithPatchToLEA(Address address, RegisterID dest)
     {
         Label label(this);
index 57b811c..14a02f7 100644 (file)
@@ -776,6 +776,12 @@ public:
         m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, base, offset);
         m_formatter.immediate8(imm);
     }
+    
+    void cmpb_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, base, index, scale, offset);
+        m_formatter.immediate8(imm);
+    }
 
     void cmpl_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
@@ -901,6 +907,12 @@ public:
         m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, base, offset);
         m_formatter.immediate8(imm);
     }
+    
+    void testb_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, base, index, scale, offset);
+        m_formatter.immediate8(imm);
+    }
 
     void testl_i32m(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
index 3f0babc..8522e45 100644 (file)
@@ -337,7 +337,7 @@ void SamplingTool::dump(ExecState* exec)
 
         if (blockPercent >= 1) {
             //Instruction* code = codeBlock->instructions().begin();
-            printf("#%d: %s:%d: %d / %lld (%.3f%%)\n", i + 1, record->m_executable->sourceURL().UTF8String().c_str(), codeBlock->lineNumberForBytecodeOffset(exec, 0), record->m_sampleCount, m_sampleCount, blockPercent);
+            printf("#%d: %s:%d: %d / %lld (%.3f%%)\n", i + 1, record->m_executable->sourceURL().ascii(), codeBlock->lineNumberForBytecodeOffset(exec, 0), record->m_sampleCount, m_sampleCount, blockPercent);
             if (i < 10) {
                 HashMap<unsigned,unsigned> lineCounts;
                 codeBlock->dump(exec);
diff --git a/JavaScriptCore/create_regex_tables b/JavaScriptCore/create_regex_tables
new file mode 100644 (file)
index 0000000..2d99bd1
--- /dev/null
@@ -0,0 +1,89 @@
+types = {
+    "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
+    "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
+    "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
+    "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a)]},
+    "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xffff)]},
+    "digits": { "UseTable" : False, "data": [('0', '9')]},
+    "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
+}
+entriesPerLine = 50
+arrays = "";
+functions = "";
+
+for name, classes in types.items():
+    ranges = [];
+    size = 0;
+    for _class in classes["data"]:
+        if type(_class) == str:
+            ranges.append((ord(_class), ord(_class)))
+        elif type(_class) == int:
+            ranges.append((_class, _class))
+        else:
+            (min, max) = _class;
+            if type(min) == str:
+                min = ord(min)
+            if type(max) == str:
+                max = ord(max)
+            if max > 0x7f and min <= 0x7f:
+                ranges.append((min, 0x7f))
+                min = 0x80
+            ranges.append((min,max))
+    ranges.sort();
+    
+    if classes["UseTable"] and (not "Inverse" in classes):
+        array = ("static const char _%sData[65536] = {\n" % name);
+        i = 0
+        for (min,max) in ranges:
+            while i < min:
+                i = i + 1
+                array += ('0,')
+                if (i % entriesPerLine == 0) and (i != 0):
+                    array += ('\n')
+            while i <= max:
+                i = i + 1
+                if (i == 65536):
+                    array += ("1")
+                else:
+                    array += ('1,')
+                if (i % entriesPerLine == 0) and (i != 0):
+                    array += ('\n')
+        while i < 0xffff:
+            array += ("0,")
+            i = i + 1;
+            if (i % entriesPerLine == 0) and (i != 0):
+                array += ('\n')
+        if i == 0xffff:
+            array += ("0")
+        array += ("\n};\n\n");
+        arrays += array
+    
+    # Generate createFunction:
+    function = "";
+    function += ("CharacterClass* %sCreate()\n" % name)
+    function += ("{\n")
+    if classes["UseTable"]:
+        if "Inverse" in classes:
+            function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
+        else:
+            function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
+    else:
+        function += ("    CharacterClass* characterClass = new CharacterClass(0);\n")
+    for (min, max) in ranges:
+        if (min == max):
+            if (min > 127):
+                function += ("    characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
+            else:
+                function += ("    characterClass->m_matches.append(0x%02x);\n" % min)
+            continue
+        if (min > 127) or (max > 127):
+            function += ("    characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
+        else:
+            function += ("    characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
+    function += ("    return characterClass;\n")
+    function += ("}\n\n")
+    functions += function
+
+print(arrays)
+print(functions)
+
index 6ad0106..9fbe213 100644 (file)
@@ -36,6 +36,8 @@ using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
+#include "RegExpJitTables.h"
+
 class CharacterClassConstructor {
 public:
     CharacterClassConstructor(bool isCaseInsensitive = false)
@@ -141,7 +143,7 @@ public:
 
     CharacterClass* charClass()
     {
-        CharacterClass* characterClass = new CharacterClass();
+        CharacterClass* characterClass = new CharacterClass(0);
 
         characterClass->m_matches.append(m_matches);
         characterClass->m_ranges.append(m_ranges);
@@ -233,105 +235,6 @@ private:
     Vector<CharacterRange> m_rangesUnicode;
 };
 
-
-CharacterClass* newlineCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('\n');
-    characterClass->m_matches.append('\r');
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    
-    return characterClass;
-}
-
-CharacterClass* digitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    
-    return characterClass;
-}
-
-CharacterClass* spacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append(' ');
-    characterClass->m_ranges.append(CharacterRange('\t', '\r'));
-    characterClass->m_matchesUnicode.append(0x00a0);
-    characterClass->m_matchesUnicode.append(0x1680);
-    characterClass->m_matchesUnicode.append(0x180e);
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    characterClass->m_matchesUnicode.append(0x202f);
-    characterClass->m_matchesUnicode.append(0x205f);
-    characterClass->m_matchesUnicode.append(0x3000);
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
-    
-    return characterClass;
-}
-
-CharacterClass* wordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('_');
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    characterClass->m_ranges.append(CharacterRange('A', 'Z'));
-    characterClass->m_ranges.append(CharacterRange('a', 'z'));
-    
-    return characterClass;
-}
-
-CharacterClass* nondigitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-    
-    return characterClass;
-}
-
-CharacterClass* nonspacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
-    characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
-    characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
-    
-    return characterClass;
-}
-
-CharacterClass* nonwordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('`');
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
-    characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
-    characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-
-    return characterClass;
-}
-
-
 class RegexPatternConstructor {
 public:
     RegexPatternConstructor(RegexPattern& pattern)
index f37abbf..340b53d 100644 (file)
@@ -40,7 +40,6 @@ using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
-
 class RegexGenerator : private MacroAssembler {
     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
 
@@ -155,6 +154,11 @@ class RegexGenerator : private MacroAssembler {
 
     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
     {
+        if (charClass->m_table) {
+            ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
+            matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));   
+            return;
+        }
         Jump unicodeFail;
         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
@@ -609,9 +613,14 @@ class RegexGenerator : private MacroAssembler {
             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
         }
+
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+        if (term.quantityCount != 0xffffffff)
+            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+        else
+            jump(loop);
+
         failures.append(jump());
 
         Label backtrackBegin(this);
@@ -646,7 +655,8 @@ class RegexGenerator : private MacroAssembler {
         loadFromFrame(term.frameLocation, countRegister);
 
         atEndOfInput().linkTo(hardFail, this);
-        branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+        if (term.quantityCount != 0xffffffff)
+            branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
             readCharacter(state.inputOffset(), character);
             or32(Imm32(32), character);
@@ -732,7 +742,11 @@ class RegexGenerator : private MacroAssembler {
 
         add32(Imm32(1), countRegister);
         add32(Imm32(1), index);
-        branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+        if (term.quantityCount != 0xffffffff)
+            branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+        else
+            jump(loop);
+
         failures.append(jump());
 
         Label backtrackBegin(this);
index 69bee0c..3271cc1 100644 (file)
@@ -56,11 +56,35 @@ struct CharacterRange {
     }
 };
 
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+    const char* m_table;
+    bool m_inverted;
+    static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+    {
+        return adoptRef(new CharacterClassTable(table, inverted));
+    }
+
+private:
+    CharacterClassTable(const char* table, bool inverted)
+        : m_table(table)
+        , m_inverted(inverted)
+    {
+    }
+};
+
 struct CharacterClass : FastAllocBase {
+    // All CharacterClass instances have to have the full set of matches and ranges,
+    // they may have an optional table for faster lookups (which must match the
+    // specified matches and ranges)
+    CharacterClass(PassRefPtr<CharacterClassTable> table)
+        : m_table(table)
+    {
+    }
     Vector<UChar> m_matches;
     Vector<CharacterRange> m_ranges;
     Vector<UChar> m_matchesUnicode;
     Vector<CharacterRange> m_rangesUnicode;
+    RefPtr<CharacterClassTable> m_table;
 };
 
 enum QuantifierType {