JavaScriptCore:
authorbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 24 Apr 2009 00:32:31 +0000 (00:32 +0000)
committerbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 24 Apr 2009 00:32:31 +0000 (00:32 +0000)
2009-04-23  Gavin Barraclough  <barraclough@apple.com>

        Reviewed by Geoff "Dread Pirate Roberts" Garen.

        Various small fixes to YARR JIT, in preparation for enabling it by default.

        * Correctly index into the callframe when storing restart addresses for
          nested alternatives.
        * Allow backtracking back into matched alternatives of parentheses.
        * Fix callframe offset calculation for parenthetical assertions.
        * When a set of parenthese are quantified with a fixed and variable portion,
          and the variable portion is quantified once, this should not reset the
          pattern match on failure to match (the last match from the firxed portion
          should be preserved).
        * Up the pattern size limit to match PCRE's new limit.
        * Unlclosed parentheses should be reported with the message "missing )".

        * wtf/Platform.h:
        * yarr/RegexCompiler.cpp:
        (JSC::Yarr::RegexPatternConstructor::quantifyAtom):
        (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets):
        * yarr/RegexInterpreter.cpp:
        (JSC::Yarr::Interpreter::matchParentheses):
        (JSC::Yarr::Interpreter::backtrackParentheses):
        (JSC::Yarr::ByteCompiler::emitDisjunction):
        * yarr/RegexJIT.cpp:
        (JSC::Yarr::RegexGenerator::loadFromFrameAndJump):
        (JSC::Yarr::RegexGenerator::generateParenthesesDisjunction):
        (JSC::Yarr::RegexGenerator::generateParentheticalAssertion):
        (JSC::Yarr::RegexGenerator::generateTerm):
        (JSC::Yarr::executeRegex):
        * yarr/RegexParser.h:
        (JSC::Yarr::Parser::):
        (JSC::Yarr::Parser::parseTokens):
        (JSC::Yarr::Parser::parse):
        * yarr/RegexPattern.h:
        (JSC::Yarr::PatternTerm::):
        (JSC::Yarr::PatternTerm::PatternTerm):

LayoutTests:

2009-04-23  Gavin Barraclough  <barraclough@apple.com>

        Reviewed by Geoff "Dread Pirate Roberts" Garen.

        This test tries to force itself into PCRE; modify the
        test so that as well as dodging WREC it can also avoid
        YARR!

        * fast/js/resources/regexp-overflow-too-big.js:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@42800 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/yarr/RegexCompiler.cpp
JavaScriptCore/yarr/RegexInterpreter.cpp
JavaScriptCore/yarr/RegexJIT.cpp
JavaScriptCore/yarr/RegexParser.h
JavaScriptCore/yarr/RegexPattern.h
LayoutTests/ChangeLog
LayoutTests/fast/js/resources/regexp-overflow-too-big.js

index 7a62088..19b7b4e 100644 (file)
@@ -1,3 +1,42 @@
+2009-04-23  Gavin Barraclough  <barraclough@apple.com>
+
+        Reviewed by Geoff "Dread Pirate Roberts" Garen.
+
+        Various small fixes to YARR JIT, in preparation for enabling it by default.
+
+        * Correctly index into the callframe when storing restart addresses for
+          nested alternatives.
+        * Allow backtracking back into matched alternatives of parentheses.
+        * Fix callframe offset calculation for parenthetical assertions.
+        * When a set of parenthese are quantified with a fixed and variable portion,
+          and the variable portion is quantified once, this should not reset the
+          pattern match on failure to match (the last match from the firxed portion
+          should be preserved).
+        * Up the pattern size limit to match PCRE's new limit.
+        * Unlclosed parentheses should be reported with the message "missing )".
+
+        * wtf/Platform.h:
+        * yarr/RegexCompiler.cpp:
+        (JSC::Yarr::RegexPatternConstructor::quantifyAtom):
+        (JSC::Yarr::RegexPatternConstructor::setupAlternativeOffsets):
+        * yarr/RegexInterpreter.cpp:
+        (JSC::Yarr::Interpreter::matchParentheses):
+        (JSC::Yarr::Interpreter::backtrackParentheses):
+        (JSC::Yarr::ByteCompiler::emitDisjunction):
+        * yarr/RegexJIT.cpp:
+        (JSC::Yarr::RegexGenerator::loadFromFrameAndJump):
+        (JSC::Yarr::RegexGenerator::generateParenthesesDisjunction):
+        (JSC::Yarr::RegexGenerator::generateParentheticalAssertion):
+        (JSC::Yarr::RegexGenerator::generateTerm):
+        (JSC::Yarr::executeRegex):
+        * yarr/RegexParser.h:
+        (JSC::Yarr::Parser::):
+        (JSC::Yarr::Parser::parseTokens):
+        (JSC::Yarr::Parser::parse):
+        * yarr/RegexPattern.h:
+        (JSC::Yarr::PatternTerm::):
+        (JSC::Yarr::PatternTerm::PatternTerm):
+
 2009-04-22  Mark Rowe  <mrowe@apple.com>
 
         Rubber-stamped by Gavin Barraclough.
index b2761ca..eee8845 100644 (file)
@@ -541,6 +541,8 @@ public:
             m_alternative->m_terms.append(copyTerm(term));
             // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
             m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+            if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
+                m_alternative->lastTerm().parentheses.isCopy = true;
         }
     }
 
@@ -607,7 +609,7 @@ public:
             case PatternTerm::TypeParenthesesSubpattern:
                 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
                 term.frameLocation = currentCallFrameSize;
-                if (term.quantityCount == 1) {
+                if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
                     if (term.quantityType == QuantifierFixedCount) {
                         currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
                         currentInputPosition += term.parentheses.disjunction->m_minimumSize;
index 236776b..f37a2fe 100644 (file)
@@ -776,7 +776,6 @@ public:
     bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-        ASSERT(term.atom.quantityCount > 1);
 
         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 
@@ -852,7 +851,6 @@ public:
     bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-        ASSERT(term.atom.quantityCount > 1);
 
         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
 
@@ -1437,7 +1435,7 @@ public:
 
                 case PatternTerm::TypeParenthesesSubpattern: {
                     unsigned disjunctionAlreadyCheckedCount = 0;
-                    if (term.quantityCount == 1) {
+                    if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
                         if (term.quantityType == QuantifierFixedCount) {
                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
index 7f63f12..3ad168d 100644 (file)
@@ -252,7 +252,7 @@ class RegexGenerator : private MacroAssembler {
 
     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
     {
-        return storePtrWithPatch(Address(stackPointerRegister, frameLocation));
+        return storePtrWithPatch(Address(stackPointerRegister, frameLocation * sizeof(void*)));
     }
 
     void loadFromFrame(unsigned frameLocation, RegisterID reg)
@@ -262,7 +262,7 @@ class RegexGenerator : private MacroAssembler {
 
     void loadFromFrameAndJump(unsigned frameLocation)
     {
-        jump(Address(stackPointerRegister, frameLocation));
+        jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
     }
 
     struct AlternativeBacktrackRecord {
@@ -826,6 +826,10 @@ class RegexGenerator : private MacroAssembler {
 
                 // Alternative did not match.
                 Label backtrackLocation(this);
+                
+                // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
+                state.plantJumpToBacktrackIfExists(this);
+                
                 state.linkAlternativeBacktracks(this);
 
                 if (countToCheck) {
@@ -956,7 +960,7 @@ class RegexGenerator : private MacroAssembler {
         ASSERT(term.quantityType == QuantifierFixedCount);
 
         unsigned parenthesesFrameLocation = term.frameLocation;
-        unsigned alternativeFrameLocation = parenthesesFrameLocation += RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce;
+        unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce;
 
         int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
 
@@ -1063,7 +1067,7 @@ class RegexGenerator : private MacroAssembler {
             break;
 
         case PatternTerm::TypeParenthesesSubpattern:
-            if (term.quantityCount == 1)
+            if ((term.quantityCount == 1) && !term.parentheses.isCopy)
                 generateParenthesesSingle(state);
             else
                 m_generationFailed = true;
@@ -1240,7 +1244,7 @@ int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start,
 {
     if (jitObject.m_pcreFallback) {
         int result = jsRegExpExecute(jitObject.m_pcreFallback, input, length, start, output, outputArraySize);
-        return (result < 0) ? result : output[0];
+        return (result < 0) ? -1 : output[0];
     } else {
 #if PLATFORM(X86) && !COMPILER(MSVC)
         typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) __attribute__ ((regparm (3)));
index 2951c4f..f2e65a7 100644 (file)
@@ -55,6 +55,7 @@ private:
         PatternTooLarge,
         QuantifierOutOfOrder,
         QuantifierWithoutAtom,
+        MissingParentheses,
         ParenthesesUnmatched,
         ParenthesesTypeInvalid,
         CharacterClassUnmatched,
@@ -624,7 +625,7 @@ private:
         }
 
         if (m_parenthesesNestingDepth > 0)
-            m_err = ParenthesesUnmatched;
+            m_err = MissingParentheses;
     }
 
     /*
@@ -655,6 +656,7 @@ private:
             "regular expression too large",
             "numbers out of order in {} quantifier",
             "nothing to repeat",
+            "missing )",
             "unmatched parentheses",
             "unrecognized character after (?",
             "missing terminating ] for character class",
@@ -767,7 +769,8 @@ private:
     unsigned m_index;
     unsigned m_parenthesesNestingDepth;
 
-    static const unsigned MAX_PATTERN_SIZE = (1 << 13);
+    // Derived by empirical testing of compile time in PCRE and WREC.
+    static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
 };
 
 /*
index f7a2670..7ca05a6 100644 (file)
@@ -90,6 +90,7 @@ struct PatternTerm {
             PatternDisjunction* disjunction;
             unsigned subpatternId;
             unsigned lastSubpatternId;
+            bool isCopy;
         } parentheses;
     };
     QuantifierType quantityType;
@@ -120,6 +121,7 @@ struct PatternTerm {
     {
         parentheses.disjunction = disjunction;
         parentheses.subpatternId = subpatternId;
+        parentheses.isCopy = false;
         quantityType = QuantifierFixedCount;
         quantityCount = 1;
     }
index c6601fc..2099c8c 100644 (file)
@@ -1,3 +1,13 @@
+2009-04-23  Gavin Barraclough  <barraclough@apple.com>
+
+        Reviewed by Geoff "Dread Pirate Roberts" Garen.
+
+        This test tries to force itself into PCRE; modify the
+        test so that as well as dodging WREC it can also avoid
+        YARR!
+
+        * fast/js/resources/regexp-overflow-too-big.js:
+
 2009-04-23  Kevin McCullough  <kmccullough@apple.com>
 
         Reviewed by Adam Roben.
index 1ae3441..22e0eaa 100644 (file)
@@ -5,7 +5,7 @@ for (var i = 0; i < 18; ++i)
     complexPattern += "a?";
 for (var i = 0; i < 18; ++i)
     complexPattern += "a";
-complexPattern = "(" + complexPattern + ")";
+complexPattern = "\1(" + complexPattern + ")";
 
 var complexInput = "";
 for (var i = 0; i < 18; ++i)