Temporarily rolling out r60267, I appear to have hoesed perf at the last minute....
[WebKit-https.git] / JavaScriptCore / yarr / RegexCompiler.cpp
index eee8845..9fbe213 100644 (file)
@@ -36,6 +36,8 @@ using namespace WTF;
 
 namespace JSC { namespace Yarr {
 
+#include "RegExpJitTables.h"
+
 class CharacterClassConstructor {
 public:
     CharacterClassConstructor(bool isCaseInsensitive = false)
@@ -108,11 +110,18 @@ public:
             }
         }
         if (hi >= 0x80) {
-            UChar unicodeCurr = std::max(lo, (UChar)0x80);
+            uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
             addSortedRange(m_rangesUnicode, unicodeCurr, hi);
             
             if (m_isCaseInsensitive) {
                 while (unicodeCurr <= hi) {
+                    // If the upper bound of the range (hi) is 0xffff, the increments to
+                    // unicodeCurr in this loop may take it to 0x10000.  This is fine
+                    // (if so we won't re-enter the loop, since the loop condition above
+                    // will definitely fail) - but this does mean we cannot use a UChar
+                    // to represent unicodeCurr, we must use a 32-bit value instead.
+                    ASSERT(unicodeCurr <= 0xffff);
+
                     if (isUnicodeUpper(unicodeCurr)) {
                         UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
                         UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
@@ -134,7 +143,7 @@ public:
 
     CharacterClass* charClass()
     {
-        CharacterClass* characterClass = new CharacterClass();
+        CharacterClass* characterClass = new CharacterClass(0);
 
         characterClass->m_matches.append(m_matches);
         characterClass->m_ranges.append(m_ranges);
@@ -226,105 +235,6 @@ private:
     Vector<CharacterRange> m_rangesUnicode;
 };
 
-
-CharacterClass* newlineCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('\n');
-    characterClass->m_matches.append('\r');
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    
-    return characterClass;
-}
-
-CharacterClass* digitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    
-    return characterClass;
-}
-
-CharacterClass* spacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append(' ');
-    characterClass->m_ranges.append(CharacterRange('\t', '\r'));
-    characterClass->m_matchesUnicode.append(0x00a0);
-    characterClass->m_matchesUnicode.append(0x1680);
-    characterClass->m_matchesUnicode.append(0x180e);
-    characterClass->m_matchesUnicode.append(0x2028);
-    characterClass->m_matchesUnicode.append(0x2029);
-    characterClass->m_matchesUnicode.append(0x202f);
-    characterClass->m_matchesUnicode.append(0x205f);
-    characterClass->m_matchesUnicode.append(0x3000);
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
-    
-    return characterClass;
-}
-
-CharacterClass* wordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('_');
-    characterClass->m_ranges.append(CharacterRange('0', '9'));
-    characterClass->m_ranges.append(CharacterRange('A', 'Z'));
-    characterClass->m_ranges.append(CharacterRange('a', 'z'));
-    
-    return characterClass;
-}
-
-CharacterClass* nondigitsCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-    
-    return characterClass;
-}
-
-CharacterClass* nonspacesCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
-    characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
-    characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
-    
-    return characterClass;
-}
-
-CharacterClass* nonwordcharCreate()
-{
-    CharacterClass* characterClass = new CharacterClass();
-
-    characterClass->m_matches.append('`');
-    characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
-    characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
-    characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
-    characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
-    characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
-
-    return characterClass;
-}
-
-
 class RegexPatternConstructor {
 public:
     RegexPatternConstructor(RegexPattern& pattern)
@@ -462,10 +372,13 @@ public:
     void atomBackReference(unsigned subpatternId)
     {
         ASSERT(subpatternId);
+        m_pattern.m_shouldFallBack = true;
         m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
 
-        if (subpatternId > m_pattern.m_numSubpatterns)
+        if (subpatternId > m_pattern.m_numSubpatterns) {
+            m_alternative->m_terms.append(PatternTerm::ForwardReference());
             return;
+        }
 
         PatternAlternative* currentAlternative = m_alternative;
         ASSERT(currentAlternative);
@@ -475,8 +388,10 @@ public:
             PatternTerm& term = currentAlternative->lastTerm();
             ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
 
-            if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId))
+            if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
+                m_alternative->m_terms.append(PatternTerm::ForwardReference());
                 return;
+            }
         }
 
         m_alternative->m_terms.append(PatternTerm(subpatternId));
@@ -494,6 +409,7 @@ public:
                 newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
         }
 
+        m_pattern.m_disjunctions.append(newDisjunction);
         return newDisjunction;
     }
 
@@ -532,6 +448,9 @@ public:
             return;
         }
 
+        if (max > 1 && term.type == PatternTerm::TypeParenthesesSubpattern)
+            m_pattern.m_shouldFallBack = true;
+
         if (min == 0)
             term.quantify(max, greedy   ? QuantifierGreedy : QuantifierNonGreedy);
         else if (min == max)
@@ -586,6 +505,9 @@ public:
                 alternative->m_hasFixedSize = false;
                 break;
 
+            case PatternTerm::TypeForwardReference:
+                break;
+
             case PatternTerm::TypePatternCharacter:
                 term.inputPosition = currentInputPosition;
                 if (term.quantityType != QuantifierFixedCount) {
@@ -630,7 +552,7 @@ public:
             case PatternTerm::TypeParentheticalAssertion:
                 term.inputPosition = currentInputPosition;
                 term.frameLocation = currentCallFrameSize;
-                currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertionOnce, currentInputPosition);
+                currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
                 break;
             }
         }
@@ -693,7 +615,7 @@ const char* compileRegex(const UString& patternString, RegexPattern& pattern)
         unsigned numSubpatterns = pattern.m_numSubpatterns;
 
         constructor.reset();
-#ifndef NDEBUG
+#if !ASSERT_DISABLED
         const char* error =
 #endif
             parse(constructor, patternString, numSubpatterns);