JavaScriptCore:
authordarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 15 Jul 2006 15:30:03 +0000 (15:30 +0000)
committerdarin <darin@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 15 Jul 2006 15:30:03 +0000 (15:30 +0000)
        Reviewed by Geoff.

        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=8395
          <rdar://problem/4613467>
          REGRESSION: RegEx seems broken for hex escaped non breaking space

        Test: fast/js/regexp-extended-characters-more.html

        * pcre/pcre_exec.c:
        (match): Got rid of utf16Length local variable to guarantee there's no
        extra stack usage in recursive calls. Fixed two places in the PCRE_UTF16
        code that were using the length variable, which is the UTF-8 length of
        a character in the pattern, to move in the UTF-16 subject string. Instead
        they hardcode lengths of 1 and 2 since the code already handles BMP
        characters and surrogate pairs separately. Also fixed some DPRINTF so
        I could compile with DEBUG on.
        (pcre_exec): Changed a place that was checking for multibyte characters
        in the subject string to use ISMIDCHAR. Instead it was using hardcoded
        logic that was right for UTF-8 but wrong for UTF-16.

        * pcre/pcre_compile.c: (pcre_compile2): Fixed a DPRINTF so I could compile
        with DEBUG on.

LayoutTests:

        Reviewed by Geoff.

        - test for http://bugzilla.opendarwin.org/show_bug.cgi?id=8395
          <rdar://problem/4613467>
          REGRESSION: RegEx seems broken for hex escaped non breaking space

        * fast/js/regexp-extended-characters-more-expected.txt: Added.
        * fast/js/regexp-extended-characters-more.html: Added.
        * fast/js/resources/regexp-extended-characters-more.js: Added.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@15455 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/pcre/pcre_compile.c
JavaScriptCore/pcre/pcre_exec.c
LayoutTests/ChangeLog
LayoutTests/fast/js/regexp-extended-characters-more-expected.txt [new file with mode: 0644]
LayoutTests/fast/js/regexp-extended-characters-more.html [new file with mode: 0644]
LayoutTests/fast/js/resources/regexp-extended-characters-more.js [new file with mode: 0644]

index 4307ce3be0818de80fd6c870fded58283b67a32c..2649c1d9db0b206a6bac1a9d43f1c90189374dda 100644 (file)
@@ -1,3 +1,28 @@
+2006-07-15  Darin Adler  <darin@apple.com>
+
+        Reviewed by Geoff.
+
+        - fix http://bugzilla.opendarwin.org/show_bug.cgi?id=8395
+          <rdar://problem/4613467>
+          REGRESSION: RegEx seems broken for hex escaped non breaking space
+
+        Test: fast/js/regexp-extended-characters-more.html
+
+        * pcre/pcre_exec.c:
+        (match): Got rid of utf16Length local variable to guarantee there's no
+        extra stack usage in recursive calls. Fixed two places in the PCRE_UTF16
+        code that were using the length variable, which is the UTF-8 length of
+        a character in the pattern, to move in the UTF-16 subject string. Instead
+        they hardcode lengths of 1 and 2 since the code already handles BMP
+        characters and surrogate pairs separately. Also fixed some DPRINTF so
+        I could compile with DEBUG on.
+        (pcre_exec): Changed a place that was checking for multibyte characters
+        in the subject string to use ISMIDCHAR. Instead it was using hardcoded
+        logic that was right for UTF-8 but wrong for UTF-16.
+
+        * pcre/pcre_compile.c: (pcre_compile2): Fixed a DPRINTF so I could compile
+        with DEBUG on.
+
 2006-07-14  Geoffrey Garen  <ggaren@apple.com>
 
         RS by Maciej.
index 98b484e90fd12a4c3fdd731ca968e1e82e4cbb0e..11d7aeb9b5364ec24bedbd0b02e8d485ce4fe9d7 100644 (file)
@@ -3995,7 +3995,9 @@ compile_block.backref_map = 0;
 /* Reflect pattern for debugging output */
 
 DPRINTF(("------------------------------------------------------------------\n"));
+#if !PCRE_UTF16
 DPRINTF(("%s\n", pattern));
+#endif
 
 /* The first thing to do is to make a pass over the pattern to compute the
 amount of store required to hold the compiled code. This does not have to be
index aac2a34233d4dc95b2f840c3cfbfc61b15dea1ed..52c0373e202677344c58fb84d6d8a5f3f4125644 100644 (file)
@@ -1951,12 +1951,10 @@ for (;;)
       length = 1;
       GETUTF8CHARLEN(fc, ecode, length);
       {
-      int utf16Length; /* don't initialize on this line as workaround for Win32 compile problem */
-      utf16Length = fc > 0xFFFF ? 2 : 1;
-      if (min * utf16Length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+      if (min * (fc > 0xFFFF ? 2 : 1) > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       ecode += length;
 
-      if (utf16Length == 1)
+      if (fc <= 0xFFFF)
         {
 #ifdef SUPPORT_UCP
         int othercase;
@@ -1990,7 +1988,7 @@ for (;;)
           pp = eptr;
           for (i = min; i < max; i++)
             {
-            if (eptr > md->end_subject - length) break;
+            if (eptr >= md->end_subject) break;
             if (*eptr != fc && *eptr != othercase) break;
             ++eptr;
             }
@@ -2038,7 +2036,7 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int nc;
-            if (eptr > md->end_subject - length) break;
+            if (eptr > md->end_subject - 2) break;
             GETCHAR(nc, eptr);
             if (*eptr != fc) break;
             eptr += 2;
@@ -2161,8 +2159,12 @@ for (;;)
     matching character if failing, up to the maximum. Alternatively, if
     maximizing, find the maximum number of characters and work backwards. */
 
+#if PCRE_UTF16
+    DPRINTF(("matching %c{%d,%d}\n", fc, min, max));
+#else
     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       max, eptr));
+#endif
 
     if ((ims & PCRE_CASELESS) != 0)
       {
@@ -2306,8 +2308,12 @@ for (;;)
     maximum. Alternatively, if maximizing, find the maximum number of
     characters and work backwards. */
 
+#if PCRE_UTF16
+    DPRINTF(("negative matching %c{%d,%d}\n", fc, min, max));
+#else
     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       max, eptr));
+#endif
 
     if ((ims & PCRE_CASELESS) != 0)
       {
@@ -3732,7 +3738,7 @@ do
     start_match++;
 #ifdef SUPPORT_UTF8
     if (match_block.utf8)
-      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+      while(start_match < end_subject && ISMIDCHAR(*start_match))
         start_match++;
 #endif
     continue;
index 022832e38ccc082f5256300e5011c89b830c852f..4dd196bde9cc0ad92381a0c71d2db454aa03b96a 100644 (file)
@@ -1,3 +1,15 @@
+2006-07-15  Darin Adler  <darin@apple.com>
+
+        Reviewed by Geoff.
+
+        - test for http://bugzilla.opendarwin.org/show_bug.cgi?id=8395
+          <rdar://problem/4613467>
+          REGRESSION: RegEx seems broken for hex escaped non breaking space
+
+        * fast/js/regexp-extended-characters-more-expected.txt: Added.
+        * fast/js/regexp-extended-characters-more.html: Added.
+        * fast/js/resources/regexp-extended-characters-more.js: Added.
+
 2006-07-15  Darin Adler  <darin@apple.com>
 
         Reviewed by Adele.
diff --git a/LayoutTests/fast/js/regexp-extended-characters-more-expected.txt b/LayoutTests/fast/js/regexp-extended-characters-more-expected.txt
new file mode 100644 (file)
index 0000000..dbf6e89
--- /dev/null
@@ -0,0 +1,12 @@
+This test checks a few cases of extended (> 127) characters in repeat regular expressions.
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+PASS "foo\xa0\xa0\xa0".replace(/\xa0*/, "") is "foo\xa0\xa0\xa0"
+PASS "foo\xa0\xa0\xa0".replace(/\xa0+/, "") is "foo"
+PASS "foo\xa0\xa0\xa0".replace(/\xa0*$/, "") is "foo"
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
diff --git a/LayoutTests/fast/js/regexp-extended-characters-more.html b/LayoutTests/fast/js/regexp-extended-characters-more.html
new file mode 100644 (file)
index 0000000..9f20481
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<link rel="stylesheet" href="resources/js-test-style.css">
+<script src="resources/js-test-pre.js"></script>
+</head>
+<body>
+<p id="description"></p>
+<div id="console"></div>
+<script src="resources/regexp-extended-characters-more.js"></script>
+<script src="resources/js-test-post.js"></script>
+</body>
+</html>
diff --git a/LayoutTests/fast/js/resources/regexp-extended-characters-more.js b/LayoutTests/fast/js/resources/regexp-extended-characters-more.js
new file mode 100644 (file)
index 0000000..9734f6f
--- /dev/null
@@ -0,0 +1,9 @@
+description(
+"This test checks a few cases of extended (> 127) characters in repeat regular expressions."
+);
+
+shouldBe('"foo\\xa0\\xa0\\xa0".replace(/\\xa0*/, "")', '"foo\\xa0\\xa0\\xa0"');
+shouldBe('"foo\\xa0\\xa0\\xa0".replace(/\\xa0+/, "")', '"foo"');
+shouldBe('"foo\\xa0\\xa0\\xa0".replace(/\\xa0*$/, "")', '"foo"');
+
+var successfullyParsed = true;