[JSC] Shrink sizeof(RegExp)
authorysuzuki@apple.com <ysuzuki@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 23 Mar 2019 07:58:07 +0000 (07:58 +0000)
committerysuzuki@apple.com <ysuzuki@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 23 Mar 2019 07:58:07 +0000 (07:58 +0000)
https://bugs.webkit.org/show_bug.cgi?id=196133

Reviewed by Mark Lam.

Some applications have many RegExp cells. But RegExp cells are very large (144B).
This patch reduces the size from 144B to 48B by,

1. Allocate Yarr::YarrCodeBlock in non-GC heap. We can avoid this allocation if JIT is disabled.
2. m_captureGroupNames and m_namedGroupToParenIndex are moved to RareData. They are only used when RegExp has named capture groups.

* runtime/RegExp.cpp:
(JSC::RegExp::finishCreation):
(JSC::RegExp::estimatedSize):
(JSC::RegExp::compile):
(JSC::RegExp::matchConcurrently):
(JSC::RegExp::compileMatchOnly):
(JSC::RegExp::deleteCode):
(JSC::RegExp::printTraceData):
* runtime/RegExp.h:
* runtime/RegExpInlines.h:
(JSC::RegExp::hasCodeFor):
(JSC::RegExp::matchInline):
(JSC::RegExp::hasMatchOnlyCodeFor):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@243408 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/RegExp.cpp
Source/JavaScriptCore/runtime/RegExp.h
Source/JavaScriptCore/runtime/RegExpInlines.h

index 47abcfa..aa4d210 100644 (file)
@@ -1,3 +1,30 @@
+2019-03-23  Yusuke Suzuki  <ysuzuki@apple.com>
+
+        [JSC] Shrink sizeof(RegExp)
+        https://bugs.webkit.org/show_bug.cgi?id=196133
+
+        Reviewed by Mark Lam.
+
+        Some applications have many RegExp cells. But RegExp cells are very large (144B).
+        This patch reduces the size from 144B to 48B by,
+
+        1. Allocate Yarr::YarrCodeBlock in non-GC heap. We can avoid this allocation if JIT is disabled.
+        2. m_captureGroupNames and m_namedGroupToParenIndex are moved to RareData. They are only used when RegExp has named capture groups.
+
+        * runtime/RegExp.cpp:
+        (JSC::RegExp::finishCreation):
+        (JSC::RegExp::estimatedSize):
+        (JSC::RegExp::compile):
+        (JSC::RegExp::matchConcurrently):
+        (JSC::RegExp::compileMatchOnly):
+        (JSC::RegExp::deleteCode):
+        (JSC::RegExp::printTraceData):
+        * runtime/RegExp.h:
+        * runtime/RegExpInlines.h:
+        (JSC::RegExp::hasCodeFor):
+        (JSC::RegExp::matchInline):
+        (JSC::RegExp::hasMatchOnlyCodeFor):
+
 2019-03-22  Keith Rollin  <krollin@apple.com>
 
         Enable ThinLTO support in Production builds
index 93ad2e9..e61b9aa 100644 (file)
@@ -171,12 +171,16 @@ void RegExp::finishCreation(VM& vm)
 {
     Base::finishCreation(vm);
     Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm.stackLimit());
-    if (!isValid())
+    if (!isValid()) {
         m_state = ParseError;
-    else {
-        m_numSubpatterns = pattern.m_numSubpatterns;
-        m_captureGroupNames.swap(pattern.m_captureGroupNames);
-        m_namedGroupToParenIndex.swap(pattern.m_namedGroupToParenIndex);
+        return;
+    }
+
+    m_numSubpatterns = pattern.m_numSubpatterns;
+    if (!pattern.m_captureGroupNames.isEmpty() || !pattern.m_namedGroupToParenIndex.isEmpty()) {
+        m_rareData = std::make_unique<RareData>();
+        m_rareData->m_captureGroupNames.swap(pattern.m_captureGroupNames);
+        m_rareData->m_namedGroupToParenIndex.swap(pattern.m_namedGroupToParenIndex);
     }
 }
 
@@ -194,7 +198,8 @@ size_t RegExp::estimatedSize(JSCell* cell, VM& vm)
     RegExp* thisObject = static_cast<RegExp*>(cell);
     size_t regexDataSize = thisObject->m_regExpBytecode ? thisObject->m_regExpBytecode->estimatedSizeInBytes() : 0;
 #if ENABLE(YARR_JIT)
-    regexDataSize += thisObject->m_regExpJITCode.size();
+    if (auto* jitCode = thisObject->m_regExpJITCode.get())
+        regexDataSize += jitCode->size();
 #endif
     return Base::estimatedSize(cell, vm) + regexDataSize;
 }
@@ -237,7 +242,7 @@ void RegExp::byteCodeCompileIfNecessary(VM* vm)
 
 void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
 {
-    ConcurrentJSLocker locker(m_lock);
+    auto locker = holdLock(cellLock());
     
     Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm->stackLimit());
     if (hasError(m_constructionErrorCode)) {
@@ -258,8 +263,9 @@ void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
         && !pattern.m_containsBackreferences
 #endif
         ) {
-        Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode);
-        if (!m_regExpJITCode.failureReason()) {
+        auto& jitCode = ensureRegExpJITCode();
+        Yarr::jitCompile(pattern, m_patternString, charSize, vm, jitCode);
+        if (!jitCode.failureReason()) {
             m_state = JITCode;
             return;
         }
@@ -283,7 +289,7 @@ int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ov
 bool RegExp::matchConcurrently(
     VM& vm, const String& s, unsigned startOffset, int& position, Vector<int>& ovector)
 {
-    ConcurrentJSLocker locker(m_lock);
+    auto locker = holdLock(cellLock());
 
     if (!hasCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
         return false;
@@ -294,7 +300,7 @@ bool RegExp::matchConcurrently(
 
 void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize)
 {
-    ConcurrentJSLocker locker(m_lock);
+    auto locker = holdLock(cellLock());
     
     Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm->stackLimit());
     if (hasError(m_constructionErrorCode)) {
@@ -315,8 +321,9 @@ void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize)
         && !pattern.m_containsBackreferences
 #endif
         ) {
-        Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode, Yarr::MatchOnly);
-        if (!m_regExpJITCode.failureReason()) {
+        auto& jitCode = ensureRegExpJITCode();
+        Yarr::jitCompile(pattern, m_patternString, charSize, vm, jitCode, Yarr::MatchOnly);
+        if (!jitCode.failureReason()) {
             m_state = JITCode;
             return;
         }
@@ -339,7 +346,7 @@ MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset)
 
 bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, MatchResult& result)
 {
-    ConcurrentJSLocker locker(m_lock);
+    auto locker = holdLock(cellLock());
 
     if (!hasMatchOnlyCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
         return false;
@@ -350,13 +357,14 @@ bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, Ma
 
 void RegExp::deleteCode()
 {
-    ConcurrentJSLocker locker(m_lock);
+    auto locker = holdLock(cellLock());
     
     if (!hasCode())
         return;
     m_state = NotCompiled;
 #if ENABLE(YARR_JIT)
-    m_regExpJITCode.clear();
+    if (m_regExpJITCode)
+        m_regExpJITCode->clear();
 #endif
     m_regExpBytecode = nullptr;
 }
@@ -426,23 +434,29 @@ void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int*
         snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
 
 #if ENABLE(YARR_JIT)
-        Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode;
-
         const size_t jitAddrSize = 20;
-        char jit8BitMatchOnlyAddr[jitAddrSize];
-        char jit16BitMatchOnlyAddr[jitAddrSize];
-        char jit8BitMatchAddr[jitAddrSize];
-        char jit16BitMatchAddr[jitAddrSize];
-        if (m_state == ByteCode) {
+        char jit8BitMatchOnlyAddr[jitAddrSize] { };
+        char jit16BitMatchOnlyAddr[jitAddrSize] { };
+        char jit8BitMatchAddr[jitAddrSize] { };
+        char jit16BitMatchAddr[jitAddrSize] { };
+        switch (m_state) {
+        case ParseError:
+        case NotCompiled:
+            break;
+        case ByteCode:
             snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback    ");
             snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "----      ");
             snprintf(jit8BitMatchAddr, jitAddrSize, "fallback    ");
             snprintf(jit16BitMatchAddr, jitAddrSize, "----      ");
-        } else {
+            break;
+        case JITCode: {
+            Yarr::YarrCodeBlock& codeBlock = *m_regExpJITCode.get();
             snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get8BitMatchOnlyAddr()));
             snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get16BitMatchOnlyAddr()));
             snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get8BitMatchAddr()));
             snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<uintptr_t>(codeBlock.get16BitMatchAddr()));
+            break;
+        }
         }
 #else
         const char* jit8BitMatchOnlyAddr = "JIT Off";
index 7b08ab7..39cdd91 100644 (file)
@@ -88,20 +88,23 @@ public:
 
     bool hasNamedCaptures()
     {
-        return !m_captureGroupNames.isEmpty();
+        return m_rareData && !m_rareData->m_captureGroupNames.isEmpty();
     }
 
     String getCaptureGroupName(unsigned i)
     {
-        if (!i || m_captureGroupNames.size() <= i)
+        if (!i || !m_rareData || m_rareData->m_captureGroupNames.size() <= i)
             return String();
-        return m_captureGroupNames[i];
+        ASSERT(m_rareData);
+        return m_rareData->m_captureGroupNames[i];
     }
 
     unsigned subpatternForName(String groupName)
     {
-        auto it = m_namedGroupToParenIndex.find(groupName);
-        if (it == m_namedGroupToParenIndex.end())
+        if (!m_rareData)
+            return 0;
+        auto it = m_rareData->m_namedGroupToParenIndex.find(groupName);
+        if (it == m_rareData->m_namedGroupToParenIndex.end())
             return 0;
         return it->value;
     }
@@ -157,15 +160,31 @@ private:
     void matchCompareWithInterpreter(const String&, int startOffset, int* offsetVector, int jitResult);
 #endif
 
+#if ENABLE(YARR_JIT)
+    Yarr::YarrCodeBlock& ensureRegExpJITCode()
+    {
+        if (!m_regExpJITCode)
+            m_regExpJITCode = std::make_unique<Yarr::YarrCodeBlock>();
+        return *m_regExpJITCode.get();
+    }
+#endif
+
+    struct RareData {
+        WTF_MAKE_STRUCT_FAST_ALLOCATED;
+        Vector<String> m_captureGroupNames;
+        HashMap<String, unsigned> m_namedGroupToParenIndex;
+    };
+
     String m_patternString;
     RegExpState m_state { NotCompiled };
     OptionSet<Yarr::Flags> m_flags;
-    ConcurrentJSLock m_lock;
     Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
     unsigned m_numSubpatterns { 0 };
-    Vector<String> m_captureGroupNames;
-    HashMap<String, unsigned> m_namedGroupToParenIndex;
     std::unique_ptr<Yarr::BytecodePattern> m_regExpBytecode;
+#if ENABLE(YARR_JIT)
+    std::unique_ptr<Yarr::YarrCodeBlock> m_regExpJITCode;
+#endif
+    std::unique_ptr<RareData> m_rareData;
 #if ENABLE(REGEXP_TRACING)
     double m_rtMatchOnlyTotalSubjectStringLen { 0.0 };
     double m_rtMatchTotalSubjectStringLen { 0.0 };
@@ -174,10 +193,6 @@ private:
     unsigned m_rtMatchCallCount { 0 };
     unsigned m_rtMatchFoundCount { 0 };
 #endif
-
-#if ENABLE(YARR_JIT)
-    Yarr::YarrCodeBlock m_regExpJITCode;
-#endif
 };
 
 } // namespace JSC
index 54b11b9..5cb330f 100644 (file)
@@ -73,9 +73,10 @@ ALWAYS_INLINE bool RegExp::hasCodeFor(Yarr::YarrCharSize charSize)
 #if ENABLE(YARR_JIT)
         if (m_state != JITCode)
             return true;
-        if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode()))
+        ASSERT(m_regExpJITCode);
+        if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCode()))
             return true;
-        if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode()))
+        if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCode()))
             return true;
 #else
         UNUSED_PARAM(charSize);
@@ -156,8 +157,9 @@ ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOff
 #if ENABLE(YARR_JIT)
     if (m_state == JITCode) {
         {
+            ASSERT(m_regExpJITCode);
 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
-            PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode.usesPatternContextBuffer());
+            PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
 
 #define EXTRA_JIT_PARAMS  , patternContextBufferHolder.buffer(), patternContextBufferHolder.size()
 #else
@@ -165,9 +167,9 @@ ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOff
 #endif
 
             if (s.is8Bit())
-                result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
+                result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
             else
-                result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
+                result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
 
 #undef EXTRA_JIT_PARAMS
         }
@@ -230,9 +232,10 @@ ALWAYS_INLINE bool RegExp::hasMatchOnlyCodeFor(Yarr::YarrCharSize charSize)
 #if ENABLE(YARR_JIT)
         if (m_state != JITCode)
             return true;
-        if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly()))
+        ASSERT(m_regExpJITCode);
+        if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCodeMatchOnly()))
             return true;
-        if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly()))
+        if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCodeMatchOnly()))
             return true;
 #else
         UNUSED_PARAM(charSize);
@@ -277,8 +280,9 @@ ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned
 
     if (m_state == JITCode) {
         {
+            ASSERT(m_regExpJITCode);
 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
-            PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode.usesPatternContextBuffer());
+            PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
 
 #define EXTRA_JIT_PARAMS  , patternContextBufferHolder.buffer(), patternContextBufferHolder.size()
 #else
@@ -286,9 +290,9 @@ ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned
 #endif
 
             if (s.is8Bit())
-                result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length() EXTRA_JIT_PARAMS);
+                result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length() EXTRA_JIT_PARAMS);
             else
-                result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length() EXTRA_JIT_PARAMS);
+                result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length() EXTRA_JIT_PARAMS);
 
 #undef EXTRA_JIT_PARAMS
         }