2010-11-16 Peter Varga <pvarga@inf.u-szeged.hu>
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 16 Nov 2010 23:27:13 +0000 (23:27 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 16 Nov 2010 23:27:13 +0000 (23:27 +0000)
        Reviewed by Gavin Barraclough.

        The number of recursive match calls isn't limited in YARR Interpreter
        https://bugs.webkit.org/show_bug.cgi?id=47906

        Check the number of the matchDisjunction recursive calls to avoid unbounded
        recursion.
        Now the matchDisjunction function returns JSRegExpResult instead of bool.
        The JSRegExpResult enum contains the result of matching or the error code
        of the failure (like HitLimit) which terminates the matching.
        The error codes are based on pcre's jsRegExpExecute error codes.

        * yarr/RegexInterpreter.cpp:
        (JSC::Yarr::Interpreter::parenthesesDoBacktrack):
        (JSC::Yarr::Interpreter::matchParentheses):
        (JSC::Yarr::Interpreter::backtrackParentheses):
        (JSC::Yarr::Interpreter::matchDisjunction):
        (JSC::Yarr::Interpreter::matchNonZeroDisjunction):
        (JSC::Yarr::Interpreter::interpret):
        (JSC::Yarr::Interpreter::Interpreter):
        * yarr/RegexInterpreter.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@72140 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/yarr/RegexInterpreter.cpp
JavaScriptCore/yarr/RegexInterpreter.h

index 3f2f37e..f7aadfd 100644 (file)
@@ -1,3 +1,27 @@
+2010-11-16  Peter Varga  <pvarga@inf.u-szeged.hu>
+
+        Reviewed by Gavin Barraclough.
+
+        The number of recursive match calls isn't limited in YARR Interpreter
+        https://bugs.webkit.org/show_bug.cgi?id=47906
+
+        Check the number of the matchDisjunction recursive calls to avoid unbounded
+        recursion.
+        Now the matchDisjunction function returns JSRegExpResult instead of bool.
+        The JSRegExpResult enum contains the result of matching or the error code
+        of the failure (like HitLimit) which terminates the matching.
+        The error codes are based on pcre's jsRegExpExecute error codes.
+
+        * yarr/RegexInterpreter.cpp:
+        (JSC::Yarr::Interpreter::parenthesesDoBacktrack):
+        (JSC::Yarr::Interpreter::matchParentheses):
+        (JSC::Yarr::Interpreter::backtrackParentheses):
+        (JSC::Yarr::Interpreter::matchDisjunction):
+        (JSC::Yarr::Interpreter::matchNonZeroDisjunction):
+        (JSC::Yarr::Interpreter::interpret):
+        (JSC::Yarr::Interpreter::Interpreter):
+        * yarr/RegexInterpreter.h:
+
 2010-11-16  Brian Weinstein  <bweinstein@apple.com>
 
         Rest of the Windows build fix.
index ec96636..80440d9 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -591,20 +592,24 @@ public:
         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
         context->restoreOutput(output, firstSubpatternId, count);
     }
-    bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
     {
         while (backTrack->matchAmount) {
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 
-            if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
-                return true;
+            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch)
+                return JSRegExpMatch;
 
             resetMatches(term, context);
             popParenthesesDisjunctionContext(backTrack);
             freeParenthesesDisjunctionContext(context);
+
+            if (result != JSRegExpNoMatch)
+                return result;
         }
 
-        return false;
+        return JSRegExpNoMatch;
     }
 
     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
@@ -765,7 +770,7 @@ public:
         return false;
     }
 
-    bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -786,31 +791,42 @@ public:
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
+
+                    if (result != JSRegExpNoMatch)
+                        return result;
+
                     break;
                 }
             }
@@ -819,15 +835,15 @@ public:
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy:
-            return true;
+            return JSRegExpMatch;
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
@@ -840,7 +856,7 @@ public:
     // Non-greedy, we've already done the one less case, so don't match on popping.
     // We haven't done the one more case, so always try to add that.
     //
-    bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -859,44 +875,58 @@ public:
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 
             ParenthesesDisjunctionContext* context = 0;
+            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
 
-            if (!parenthesesDoBacktrack(term, backTrack))
-                return false;
+            if (result != JSRegExpMatch)
+                return result;
 
             // While we haven't yet reached our fixed limit,
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             if (!backTrack->matchAmount)
-                return false;
+                return JSRegExpNoMatch;
 
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
-            if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch) {
                 while (backTrack->matchAmount < term.atom.quantityCount) {
                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                    if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                    if (parenthesesResult == JSRegExpMatch)
                         appendParenthesesDisjunctionContext(backTrack, context);
                     else {
                         resetMatches(term, context);
                         freeParenthesesDisjunctionContext(context);
+
+                        if (parenthesesResult != JSRegExpNoMatch)
+                            return parenthesesResult;
+
                         break;
                     }
                 }
@@ -904,60 +934,73 @@ public:
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             if (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy: {
             // If we've not reached the limit, try to add one more match.
             if (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch) {
                     appendParenthesesDisjunctionContext(backTrack, context);
                     recordParenthesesMatch(term, context);
-                    return true;
-                } else {
-                    resetMatches(term, context);
-                    freeParenthesesDisjunctionContext(context);
+                    return JSRegExpMatch;
                 }
+
+                resetMatches(term, context);
+                freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             // Nope - okay backtrack looking for an alternative.
             while (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+                if (result == JSRegExpMatch) {
                     // successful backtrack! we're back in the game!
                     if (backTrack->matchAmount) {
                         context = backTrack->lastContext;
                         recordParenthesesMatch(term, context);
                     }
-                    return true;
+                    return JSRegExpMatch;
                 }
 
                 // pop a match off the stack
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                return result;
             }
 
-            return false;
+            return JSRegExpNoMatch;
         }
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
-    bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
+        if (!--remainingMatchCount)
+            return JSRegExpErrorHitLimit;
+
         if (btrack)
             BACKTRACK();
 
@@ -972,14 +1015,14 @@ public:
             MATCH_NEXT();
         case ByteTerm::TypeSubpatternEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeBodyAlternativeBegin:
             MATCH_NEXT();
         case ByteTerm::TypeBodyAlternativeDisjunction:
         case ByteTerm::TypeBodyAlternativeEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeAlternativeBegin:
             MATCH_NEXT();
@@ -1069,10 +1112,16 @@ public:
             if (matchBackReference(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
-        case ByteTerm::TypeParenthesesSubpattern:
-            if (matchParentheses(currentTerm(), context))
+        case ByteTerm::TypeParenthesesSubpattern: {
+            JSRegExpResult result = matchParentheses(currentTerm(), context);
+
+            if (result == JSRegExpMatch) {
                 MATCH_NEXT();
+            }  else if (result != JSRegExpNoMatch)
+                return result;
+
             BACKTRACK();
+        }
         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
             if (matchParenthesesOnceBegin(currentTerm(), context))
                 MATCH_NEXT();
@@ -1104,7 +1153,7 @@ public:
 
         switch (currentTerm().type) {
         case ByteTerm::TypeSubpatternBegin:
-            return false;
+            return JSRegExpNoMatch;
         case ByteTerm::TypeSubpatternEnd:
             ASSERT_NOT_REACHED();
 
@@ -1116,7 +1165,7 @@ public:
                 MATCH_NEXT();
 
             if (input.atEnd())
-                return false;
+                return JSRegExpNoMatch;
 
             input.next();
             context->matchBegin = input.getPos();
@@ -1172,10 +1221,16 @@ public:
                 if (backtrackBackReference(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
-            case ByteTerm::TypeParenthesesSubpattern:
-                if (backtrackParentheses(currentTerm(), context))
+            case ByteTerm::TypeParenthesesSubpattern: {
+                JSRegExpResult result = backtrackParentheses(currentTerm(), context);
+
+                if (result == JSRegExpMatch) {
                     MATCH_NEXT();
+                } else if (result != JSRegExpNoMatch)
+                    return result;
+
                 BACKTRACK();
+            }
             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
                     MATCH_NEXT();
@@ -1199,20 +1254,23 @@ public:
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
-    bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
-        if (matchDisjunction(disjunction, context, btrack)) {
+        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
+
+        if (result == JSRegExpMatch) {
             while (context->matchBegin == context->matchEnd) {
-                if (!matchDisjunction(disjunction, context, true))
-                    return false;
+                result = matchDisjunction(disjunction, context, true);
+                if (result != JSRegExpMatch)
+                    return result;
             }
-            return true;
+            return JSRegExpMatch;
         }
 
-        return false;
+        return result;
     }
 
     int interpret()
@@ -1226,7 +1284,8 @@ public:
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
-        if (matchDisjunction(pattern->m_body.get(), context)) {
+        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context);
+        if (result == JSRegExpMatch) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;
         }
@@ -1235,6 +1294,9 @@ public:
 
         pattern->m_allocator->stopAllocator();
 
+        if (output[0] == -1 && result != JSRegExpNoMatch)
+            return result;
+
         return output[0];
     }
 
@@ -1243,6 +1305,7 @@ public:
         , output(output)
         , input(inputChar, start, length)
         , allocatorPool(0)
+        , remainingMatchCount(matchLimit)
     {
     }
 
@@ -1251,6 +1314,7 @@ private:
     int* output;
     InputStream input;
     BumpPointerPool* allocatorPool;
+    unsigned remainingMatchCount;
 };
 
 
index 37f17fe..fe775a4 100644 (file)
@@ -40,6 +40,21 @@ using WTF::BumpPointerAllocator;
 
 namespace JSC { namespace Yarr {
 
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+    JSRegExpMatch = 1,
+    JSRegExpNoMatch = 0,
+    JSRegExpErrorNoMatch = -1,
+    JSRegExpErrorHitLimit = -2,
+    JSRegExpErrorNoMemory = -3,
+    JSRegExpErrorInternal = -4
+};
+
 class ByteDisjunction;
 
 struct ByteTerm {