2011-01-24 Peter Varga <pvarga@inf.u-szeged.hu>
authorpvarga@webkit.org <pvarga@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 24 Jan 2011 11:52:43 +0000 (11:52 +0000)
committerpvarga@webkit.org <pvarga@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 24 Jan 2011 11:52:43 +0000 (11:52 +0000)
        Reviewed by Oliver Hunt.

        Optimize regex patterns which contain empty alternatives
        https://bugs.webkit.org/show_bug.cgi?id=51395

        Eliminate the empty alternatives from the regex pattern and convert it to do
        the matching in an easier way.

        * fast/regex/script-tests/slow.js:
        * fast/regex/slow-expected.txt:
2011-01-24  Peter Varga  <pvarga@webkit.org>

        Reviewed by Oliver Hunt.

        Optimize regex patterns which contain empty alternatives
        https://bugs.webkit.org/show_bug.cgi?id=51395

        Eliminate the empty alternatives from the regex pattern and convert it to do
        the matching in an easier way.

        * yarr/YarrPattern.cpp:
        (JSC::Yarr::YarrPatternConstructor::atomParenthesesEnd):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@76502 268f45cc-cd09-0410-ab3c-d52691b4dbfc

LayoutTests/ChangeLog
LayoutTests/fast/regex/script-tests/slow.js
LayoutTests/fast/regex/slow-expected.txt
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/yarr/YarrPattern.cpp

index 69203d3349007d847612bf431963826482d43b64..b1a0054d0e23f18c17eb83d1ed8f7649943c7181 100644 (file)
@@ -1,3 +1,16 @@
+2011-01-24  Peter Varga  <pvarga@inf.u-szeged.hu>
+
+        Reviewed by Oliver Hunt.
+
+        Optimize regex patterns which contain empty alternatives
+        https://bugs.webkit.org/show_bug.cgi?id=51395
+
+        Eliminate the empty alternatives from the regex pattern and convert it to do
+        the matching in an easier way.
+
+        * fast/regex/script-tests/slow.js:
+        * fast/regex/slow-expected.txt:
+
 2011-01-24  Pavel Podivilov  <podivilov@chromium.org>
 
         Unreviewed, test fix for r76497.
index 54b6b81ed6e18c9da8ac6f89b291af721550c6e3..83ccb91216811c4d41e73482a84c61f37ddc62e2 100644 (file)
@@ -2,6 +2,6 @@ description(
 'Test for expressions that would hang when evaluated due to exponential matching behavior. If the test does not hang it is a success.'
 );
 
-shouldBe('/(?:[^(?!)]||){23}z/.test("/(?:[^(?!)]||){23}z/")', 'false');
+shouldBe('/(?:[^(?!)]||){23}z/.test("/(?:[^(?!)]||){23}z/")', 'true');
 
 var successfullyParsed = true;
index 105f24f138af2ed1fee7fc45f22b78027fc16fb1..d238a83d5e2c97b018214669f693498cc047a84e 100644 (file)
@@ -3,7 +3,7 @@ Test for expressions that would hang when evaluated due to exponential matching
 On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
 
 
-PASS /(?:[^(?!)]||){23}z/.test("/(?:[^(?!)]||){23}z/") is false
+PASS /(?:[^(?!)]||){23}z/.test("/(?:[^(?!)]||){23}z/") is true
 PASS successfullyParsed is true
 
 TEST COMPLETE
index 14c9992df139b8c2a638322b99f5692462daeb10..7d24a40ca4dea75d65ef8183ce8fdbacf0137dd0 100644 (file)
@@ -1,3 +1,16 @@
+2011-01-24  Peter Varga  <pvarga@webkit.org>
+
+        Reviewed by Oliver Hunt.
+
+        Optimize regex patterns which contain empty alternatives
+        https://bugs.webkit.org/show_bug.cgi?id=51395
+
+        Eliminate the empty alternatives from the regex pattern and convert it to do
+        the matching in an easier way.
+
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::YarrPatternConstructor::atomParenthesesEnd):
+
 2011-01-24  Andras Becsi  <abecsi@webkit.org>
 
         Reviewed by Csaba Osztrogon√°c.
index 112b65d9c743bfd53ad050fd695046ed09f115e4..3d6dbd39d40cb233b3694c2ca56ad7b925148e8a 100644 (file)
@@ -488,24 +488,56 @@ public:
         m_alternative = m_alternative->m_parent->m_parent;
 
         PatternTerm& lastTerm = m_alternative->lastTerm();
-        
+
         unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
         unsigned numBOLAnchoredAlts = 0;
-        // Bubble up BOL flags
+        bool containsEmptyAlternative = false;
+
         for (unsigned i = 0; i < numParenAlternatives; i++) {
+            if (!parenthesesDisjunction->m_alternatives[i]->m_terms.size() && numParenAlternatives > 1) {
+                parenthesesDisjunction->m_alternatives.remove(i);
+                --numParenAlternatives;
+
+                containsEmptyAlternative = true;
+                continue;
+            }
+
+            // Bubble up BOL flags
             if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
                 numBOLAnchoredAlts++;
         }
-        
+
         if (numBOLAnchoredAlts) {
             m_alternative->m_containsBOL = true;
             // If all the alternatives in parens start with BOL, then so does this one
             if (numBOLAnchoredAlts == numParenAlternatives)
                 m_alternative->m_startsWithBOL = true;
         }
-        
+
         lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
         m_invertParentheticalAssertion = false;
+
+        if (containsEmptyAlternative) {
+            // Backup and remove the current disjunction's alternatives.
+            Vector<PatternAlternative*> alternatives;
+            alternatives.append(parenthesesDisjunction->m_alternatives);
+            parenthesesDisjunction->m_alternatives.clear();
+            PatternAlternative* alternative = parenthesesDisjunction->addNewAlternative();
+
+            // Insert a new non-capturing parentheses.
+            unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
+            PatternDisjunction* newDisjunction = new PatternDisjunction(alternative);
+            m_pattern.m_disjunctions.append(newDisjunction);
+            alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, newDisjunction, false, false));
+            newDisjunction->m_alternatives.append(alternatives);
+
+            // Set the quantifier of the new parentheses to '?' and set the inherited properties.
+            PatternTerm& disjunctionTerm = alternative->lastTerm();
+            disjunctionTerm.quantify(1, QuantifierGreedy);
+            disjunctionTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+            alternative->m_containsBOL = m_alternative->m_containsBOL;
+            alternative->m_startsWithBOL = m_alternative->m_startsWithBOL;
+        }
     }
 
     void atomBackReference(unsigned subpatternId)