2007-11-30 Eric Seidel <eric@webkit.org>
authoreric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 30 Nov 2007 23:43:45 +0000 (23:43 +0000)
committereric@webkit.org <eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 30 Nov 2007 23:43:45 +0000 (23:43 +0000)
        Reviewed by darin.

        PCRE crashes under GuardMalloc
        http://bugs.webkit.org/show_bug.cgi?id=16127
        check against patternEnd to make sure we don't walk off the end of the string

        * pcre/pcre_compile.cpp:
        (compile_branch):
        (calculateCompiledPatternLengthAndFlags):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@28260 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/pcre/pcre_compile.cpp
LayoutTests/ChangeLog
LayoutTests/fast/js/regexp-compile-crash-expected.txt [new file with mode: 0644]
LayoutTests/fast/js/regexp-compile-crash.html [new file with mode: 0644]
LayoutTests/fast/js/resources/regexp-compile-crash.js [new file with mode: 0644]
WebCore/WebCore.xcodeproj/project.pbxproj

index ac0ad54a430cad82284a3a383ac00ea29eadfcb1..c0722c61bd8ece94ca6d45f463d3a45e13906677 100644 (file)
 
         * pcre/pcre_internal.h: Removed all the UTF-16 helper functions.
 
+2007-11-30  Eric Seidel  <eric@webkit.org>
+
+        Reviewed by darin.
+        
+        PCRE crashes under GuardMalloc
+        http://bugs.webkit.org/show_bug.cgi?id=16127
+        check against patternEnd to make sure we don't walk off the end of the string
+
+        * pcre/pcre_compile.cpp:
+        (compile_branch):
+        (calculateCompiledPatternLengthAndFlags):
+
 2007-11-30  Eric Seidel  <eric@webkit.org>
 
         Reviewed by Maciej.
index 99bdbf4a52e424a83d98dbfc4a7117c51b5b9293..78a38586b980ca5c632046421a9022310559a743 100644 (file)
@@ -197,7 +197,7 @@ static int check_escape(const UChar** ptrptr, const UChar* patternEnd, ErrorCode
             if (!isclass) {
                 const UChar* oldptr = ptr;
                 c -= '0';
-                while (ptr + 1 < patternEnd && isASCIIDigit(ptr[1]) && c <= bracount)
+                while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c <= bracount)
                     c = c * 10 + *(++ptr) - '0';
                 if (c <= bracount) {
                     c = -(ESC_REF + c);
@@ -726,6 +726,11 @@ Returns:         true on success
                  false, with *errorcodeptr set non-zero on error
 */
 
+static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd, UChar expected)
+{
+    return ((ptr + 1 < patternEnd) && ptr[1] == expected);
+}
+
 static bool
 compile_branch(int options, int* brackets, uschar** codeptr,
                const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int *firstbyteptr,
@@ -774,8 +779,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
     
     /* Switch on next character until the end of the branch */
     
-    for (;; ptr++)
-    {
+    for (;; ptr++) {
         bool negate_class;
         bool should_flip_negation; /* If a negative special such as \S is used, we should negate the whole class to properly support Unicode. */
         int class_charcount;
@@ -864,7 +868,10 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  they are encountered at the top level, so we'll do that too. */
                 
                 /* If the first character is '^', set the negation flag and skip it. */
-                
+
+                if (ptr + 1 >= patternEnd)
+                    return -1;
+
                 if (ptr[1] == '^') {
                     negate_class = true;
                     ++ptr;
@@ -892,7 +899,8 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  through the regex checked the overall syntax, so we don't need to be very
                  strict here. At the start of the loop, c contains the first byte of the
                  character. */
-                while ((c = *(++ptr)) != ']') {
+
+                while ((++ptr < patternEnd) && (c = *ptr) != ']') {
                     /* Backslash may introduce a single character, or it may introduce one
                      of the specials, which just set a flag. Escaped items are checked for
                      validity in the pre-compiling pass. The sequence \b is a special case.
@@ -962,7 +970,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                      Perl does not permit ']' to be the end of the range. A '-' character
                      here is treated as a literal. */
                     
-                    if (ptr[1] == '-' && ptr[2] != ']') {
+                    if ((ptr + 2 < patternEnd) && ptr[1] == '-' && ptr[2] != ']') {
                         ptr += 2;
                         
                         int d = *ptr;
@@ -1086,11 +1094,8 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                                 class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
                             }
                         }
-                    }
-                    else
-                        
-                    /* Handle a single-byte character */
-                    {
+                    } else {
+                        /* Handle a single-byte character */
                         classbits[c/8] |= (1 << (c&7));
                         if (options & IgnoreCaseOption) {
                             c = flipCase(c);
@@ -1246,7 +1251,7 @@ compile_branch(int options, int* brackets, uschar** codeptr,
                  but if PCRE_UNGREEDY is set, it works the other way round. We change the
                  repeat type to the non-default. */
                 
-                if (ptr + 1 < patternEnd && ptr[1] == '?') {
+                if (safelyCheckNextChar(ptr, patternEnd, '?')) {
                     repeat_type = 1;
                     ptr++;
                 } else
@@ -2236,13 +2241,13 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
     while (++ptr < patternEnd) {
         int minRepeats = 0, maxRepeats = 0;
         int c = *ptr;
-        
+
         item_count++;    /* Is zero for the first non-comment item */
-        
+
         switch (c) {
             /* A backslashed item may be an escaped data character or it may be a
              character type. */
-                
+
             case '\\':
                 c = check_escape(&ptr, patternEnd, &errorcode, bracount, false);
                 if (errorcode != 0)
@@ -2278,8 +2283,8 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                     if (refnum > compile_block.top_backref)
                         compile_block.top_backref = refnum;
                     length += 2;   /* For single back reference */
-                    if (ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) {
-                        ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
+                    if (safelyCheckNextChar(ptr, patternEnd, '{') && is_counted_repeat(ptr + 2, patternEnd)) {
+                        ptr = read_repeat_counts(ptr + 2, &minRepeats, &maxRepeats, &errorcode);
                         if (errorcode)
                             return -1;
                         if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) ||
@@ -2287,7 +2292,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                             length++;
                         else
                             length += 5;
-                        if (ptr[1] == '?')
+                        if (safelyCheckNextChar(ptr, patternEnd, '?'))
                             ptr++;
                     }
                 }
@@ -2308,7 +2313,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                 
             /* This covers the cases of braced repeats after a single char, metachar,
              class, or back reference. */
-            
+
             case '{':
                 if (!is_counted_repeat(ptr+1, patternEnd))
                     goto NORMAL_CHAR;
@@ -2333,11 +2338,11 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                     length += lastitemlength + ((maxRepeats > 0)? 3 : 1);
                 }
                 
-                if (ptr[1] == '?')
+                if (safelyCheckNextChar(ptr, patternEnd, '?'))
                     ptr++;      /* Needs no extra length */
-                
+
             POSSESSIVE:                     /* Test for possessive quantifier */
-                if (ptr[1] == '+') {
+                if (safelyCheckNextChar(ptr, patternEnd, '+')) {
                     ptr++;
                     length += 2 + 2 * LINK_SIZE;   /* Allow for atomic brackets */
                 }
@@ -2411,9 +2416,9 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                         class_optcount++;
                         
                         int d = -1;
-                        if (ptr + 1 < patternEnd && ptr[1] == '-') {
+                        if (safelyCheckNextChar(ptr, patternEnd, '-')) {
                             UChar const *hyptr = ptr++;
-                            if (ptr + 1 < patternEnd && ptr[1] == '\\') {
+                            if (safelyCheckNextChar(ptr, patternEnd, '\\')) {
                                 ptr++;
                                 d = check_escape(&ptr, patternEnd, &errorcode, bracount, true);
                                 if (errorcode != 0)
@@ -2421,7 +2426,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                                 if (-d == ESC_b)
                                     d = '\b';        /* backspace */
                             }
-                            else if (ptr + 1 < patternEnd && ptr[1] != ']')
+                            else if ((ptr + 1 < patternEnd) && ptr[1] != ']')
                                 d = *++ptr;
                             if (d < 0)
                                 ptr = hyptr;      /* go back to hyphen as data */
@@ -2521,7 +2526,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                     /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,
                      we also need extra for wrapping the whole thing in a sub-pattern. */
                     
-                    if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) {
+                    if (safelyCheckNextChar(ptr, patternEnd, '{') && is_counted_repeat(ptr+2, patternEnd)) {
                         ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
                         if (errorcode != 0)
                             return -1;
@@ -2530,10 +2535,10 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                             length++;
                         else
                             length += 5;
-                        if (ptr + 1 < patternEnd && ptr[1] == '+') {
+                        if (safelyCheckNextChar(ptr, patternEnd, '+')) {
                             ptr++;
                             length += 2 + 2 * LINK_SIZE;
-                        } else if (ptr + 1 < patternEnd && ptr[1] == '?')
+                        } else if (safelyCheckNextChar(ptr, patternEnd, '?'))
                             ptr++;
                     }
                 }
@@ -2549,7 +2554,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                 
                 /* Handle special forms of bracket, which all start (? */
                 
-                if (ptr + 1 < patternEnd && ptr[1] == '?') {
+                if (safelyCheckNextChar(ptr, patternEnd, '?')) {
                     switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) {
                             /* Non-referencing groups and lookaheads just move the pointer on, and
                              then behave like a non-special bracket, except that they don't increment
@@ -2621,7 +2626,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                 /* Leave ptr at the final char; for read_repeat_counts this happens
                  automatically; for the others we need an increment. */
                 
-                if (ptr + 1 < patternEnd && (c = ptr[1]) == '{' && is_counted_repeat(ptr+2, patternEnd)) {
+                if ((ptr + 1 < patternEnd) && (c = ptr[1]) == '{' && is_counted_repeat(ptr+2, patternEnd)) {
                     ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
                     if (errorcode)
                         return -1;
@@ -2667,7 +2672,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
                 
                 /* Allow space for once brackets for "possessive quantifier" */
                 
-                if (ptr + 1 < patternEnd && ptr[1] == '+') {
+                if (safelyCheckNextChar(ptr, patternEnd, '+')) {
                     ptr++;
                     length += 2 + 2 * LINK_SIZE;
                 }
index 3e39d8674beec5f374772819e3285e530eb3050c..a18f7989b7d00c44ba4d5d55b0d6806c2c14dcf3 100644 (file)
@@ -1,3 +1,13 @@
+2007-11-30  Eric Seidel  <eric@webkit.org>
+
+        Reviewed by darin.
+        
+        Test case for:
+        http://bugs.webkit.org/show_bug.cgi?id=16127
+
+        * fast/js/regexp-compile-crash-expected.txt: Added.
+        * fast/js/regexp-compile-crash.html: Added.
+
 2007-11-30  Adam Roben  <aroben@apple.com>
 
         Copy some cross-platform results into platform/win to avoid picking up the platform/mac versions
diff --git a/LayoutTests/fast/js/regexp-compile-crash-expected.txt b/LayoutTests/fast/js/regexp-compile-crash-expected.txt
new file mode 100644 (file)
index 0000000..1ecee2f
--- /dev/null
@@ -0,0 +1,16 @@
+Test regexp compiling to make sure it doens't crash like bug 16127
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+PASS !!/\)[;s]+/ is true
+PASS /[/ threw exception SyntaxError: Parse error.
+PASS /[a/ threw exception SyntaxError: Parse error.
+PASS /[-/ threw exception SyntaxError: Parse error.
+PASS !!/(a)\ 1/ is true
+PASS !!/(a)\ 1{1,3}/ is true
+PASS No crashes, yay!
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
diff --git a/LayoutTests/fast/js/regexp-compile-crash.html b/LayoutTests/fast/js/regexp-compile-crash.html
new file mode 100644 (file)
index 0000000..e5bfe90
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<link rel="stylesheet" href="resources/js-test-style.css">
+<script src="resources/js-test-pre.js"></script>
+</head>
+<body>
+<p id="description"></p>
+<div id="console"></div>
+<script src="resources/regexp-compile-crash.js"></script>
+<script src="resources/js-test-post.js"></script>
+</body>
+</html>
diff --git a/LayoutTests/fast/js/resources/regexp-compile-crash.js b/LayoutTests/fast/js/resources/regexp-compile-crash.js
new file mode 100644 (file)
index 0000000..7c7817a
--- /dev/null
@@ -0,0 +1,11 @@
+description("Test regexp compiling to make sure it doens't crash like bug 16127");
+
+shouldBeTrue('!!/\\)[;\s]+/');
+shouldThrow('/[/');
+shouldThrow('/[a/');
+shouldThrow('/[-/');
+shouldBeTrue('!!/(a)\1/');
+shouldBeTrue('!!/(a)\1{1,3}/');
+
+testPassed("No crashes, yay!")
+successfullyParsed = true;
index 2dd155eb4d9152cffd739ccd616dea1240c0f2cb..8a16febd4bb54c3db6ae160c157bfbff06398aed 100644 (file)
                0867D690FE84028FC02AAC07 /* Project object */ = {
                        isa = PBXProject;
                        buildConfigurationList = 149C284308902B11008A9EFC /* Build configuration list for PBXProject "WebCore" */;
+                       compatibilityVersion = "Xcode 2.4";
                        hasScannedForEncodings = 1;
                        knownRegions = (
                                English,