Reduce allocations and memory usage when compiling content extensions.
authorachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 27 Apr 2015 22:34:47 +0000 (22:34 +0000)
committerachristensen@apple.com <achristensen@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 27 Apr 2015 22:34:47 +0000 (22:34 +0000)
https://bugs.webkit.org/show_bug.cgi?id=144277

Reviewed by Benjamin Poulain.

Covered by existing tests.

* contentextensions/Term.h:
(WebCore::ContentExtensions::Term::CharacterSet::set):
(WebCore::ContentExtensions::Term::CharacterSet::get):
(WebCore::ContentExtensions::Term::CharacterSet::invert):
(WebCore::ContentExtensions::Term::CharacterSet::inverted):
(WebCore::ContentExtensions::Term::CharacterSet::bitCount):
(WebCore::ContentExtensions::Term::CharacterSet::operator==):
(WebCore::ContentExtensions::Term::CharacterSet::hash):
(WebCore::ContentExtensions::Term::Term):
(WebCore::ContentExtensions::Term::addCharacter):
(WebCore::ContentExtensions::Term::isEndOfLineAssertion):
(WebCore::ContentExtensions::Term::isUniversalTransition):
(WebCore::ContentExtensions::Term::generateSubgraphForAtom):
Use two uint64_t's instead of a BitVector with a capacity of 128 bits.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@183433 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WebCore/ChangeLog
Source/WebCore/contentextensions/Term.h

index 1e3285f..0482e74 100644 (file)
@@ -1,3 +1,27 @@
+2015-04-27  Alex Christensen  <achristensen@webkit.org>
+
+        Reduce allocations and memory usage when compiling content extensions.
+        https://bugs.webkit.org/show_bug.cgi?id=144277
+
+        Reviewed by Benjamin Poulain.
+
+        Covered by existing tests.
+
+        * contentextensions/Term.h:
+        (WebCore::ContentExtensions::Term::CharacterSet::set):
+        (WebCore::ContentExtensions::Term::CharacterSet::get):
+        (WebCore::ContentExtensions::Term::CharacterSet::invert):
+        (WebCore::ContentExtensions::Term::CharacterSet::inverted):
+        (WebCore::ContentExtensions::Term::CharacterSet::bitCount):
+        (WebCore::ContentExtensions::Term::CharacterSet::operator==):
+        (WebCore::ContentExtensions::Term::CharacterSet::hash):
+        (WebCore::ContentExtensions::Term::Term):
+        (WebCore::ContentExtensions::Term::addCharacter):
+        (WebCore::ContentExtensions::Term::isEndOfLineAssertion):
+        (WebCore::ContentExtensions::Term::isUniversalTransition):
+        (WebCore::ContentExtensions::Term::generateSubgraphForAtom):
+        Use two uint64_t's instead of a BitVector with a capacity of 128 bits.
+
 2015-04-27  Michael Catanzaro  <mcatanzaro@igalia.com>
 
         Rename WTF_USE_3D_GRAPHICS to ENABLE_GRAPHICS_CONTEXT_3D
index 1e564b6..4fd6a37 100644 (file)
@@ -31,7 +31,6 @@
 #include "NFA.h"
 #include <unicode/utypes.h>
 #include <wtf/ASCIICType.h>
-#include <wtf/BitVector.h>
 #include <wtf/HashMap.h>
 #include <wtf/Vector.h>
 
@@ -126,19 +125,47 @@ private:
     TermType m_termType { TermType::Empty };
     AtomQuantifier m_quantifier { AtomQuantifier::One };
 
-    struct CharacterSet {
-        bool inverted { false };
-        BitVector characters { 128 };
-
+    class CharacterSet {
+    public:
+        void set(UChar character)
+        {
+            RELEASE_ASSERT(character < 128);
+            m_characters[character / 64] |= (uint64_t(1) << (character % 64));
+        }
+        
+        bool get(UChar character) const
+        {
+            RELEASE_ASSERT(character < 128);
+            return m_characters[character / 64] & (uint64_t(1) << (character % 64));
+        }
+        
+        void invert()
+        {
+            ASSERT(!m_inverted);
+            m_inverted = true;
+        }
+        
+        bool inverted() const { return m_inverted; }
+        
+        unsigned bitCount() const
+        {
+            return WTF::bitCount(m_characters[0]) + WTF::bitCount(m_characters[1]);
+        }
+        
         bool operator==(const CharacterSet& other) const
         {
-            return other.inverted == inverted && other.characters == characters;
+            return other.m_inverted == m_inverted
+                && other.m_characters[0] == m_characters[0]
+                && other.m_characters[1] == m_characters[1];
         }
 
         unsigned hash() const
         {
-            return WTF::pairIntHash(inverted, characters.hash());
+            return WTF::pairIntHash(WTF::pairIntHash(WTF::intHash(m_characters[0]), WTF::intHash(m_characters[1])), m_inverted);
         }
+    private:
+        bool m_inverted { false };
+        uint64_t m_characters[2] { 0, 0 };
     };
 
     struct Group {
@@ -195,20 +222,20 @@ inline Term::Term(char character, bool isCaseSensitive)
     addCharacter(character, isCaseSensitive);
 }
 
-enum UniversalTransitionTag { UniversalTransition };
 inline Term::Term(UniversalTransitionTag)
     : m_termType(TermType::CharacterSet)
 {
     new (NotNull, &m_atomData.characterSet) CharacterSet();
-    for (unsigned i = 0; i < 128; ++i)
-        m_atomData.characterSet.characters.set(i);
+    for (UChar i = 0; i < 128; ++i)
+        m_atomData.characterSet.set(i);
 }
 
 inline Term::Term(CharacterSetTermTag, bool isInverted)
     : m_termType(TermType::CharacterSet)
 {
     new (NotNull, &m_atomData.characterSet) CharacterSet();
-    m_atomData.characterSet.inverted = isInverted;
+    if (isInverted)
+        m_atomData.characterSet.invert();
 }
 
 inline Term::Term(GroupTermTag)
@@ -220,7 +247,7 @@ inline Term::Term(GroupTermTag)
 inline Term::Term(EndOfLineAssertionTermTag)
     : Term(CharacterSetTerm, false)
 {
-    m_atomData.characterSet.characters.set(0);
+    m_atomData.characterSet.set(0);
 }
 
 inline Term::Term(const Term& other)
@@ -287,10 +314,10 @@ inline void Term::addCharacter(UChar character, bool isCaseSensitive)
         return;
 
     if (isCaseSensitive || !isASCIIAlpha(character))
-        m_atomData.characterSet.characters.set(character);
+        m_atomData.characterSet.set(character);
     else {
-        m_atomData.characterSet.characters.set(toASCIIUpper(character));
-        m_atomData.characterSet.characters.set(toASCIILower(character));
+        m_atomData.characterSet.set(toASCIIUpper(character));
+        m_atomData.characterSet.set(toASCIILower(character));
     }
 }
 
@@ -356,7 +383,7 @@ inline unsigned Term::Term::generateGraph(NFA& nfa, unsigned start, const Action
 
 inline bool Term::isEndOfLineAssertion() const
 {
-    return m_termType == TermType::CharacterSet && m_atomData.characterSet.characters.bitCount() == 1 && m_atomData.characterSet.characters.get(0);
+    return m_termType == TermType::CharacterSet && m_atomData.characterSet.bitCount() == 1 && m_atomData.characterSet.get(0);
 }
 
 inline bool Term::matchesAtLeastOneCharacter() const
@@ -517,8 +544,8 @@ inline bool Term::isUniversalTransition() const
         ASSERT_NOT_REACHED();
         break;
     case TermType::CharacterSet:
-        return (m_atomData.characterSet.inverted && !m_atomData.characterSet.characters.bitCount())
-            || (!m_atomData.characterSet.inverted && m_atomData.characterSet.characters.bitCount() == 128);
+        return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount())
+            || (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 128);
     case TermType::Group:
         return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition();
     }
@@ -537,14 +564,15 @@ inline unsigned Term::generateSubgraphForAtom(NFA& nfa, unsigned source) const
         if (isUniversalTransition())
             nfa.addTransitionsOnAnyCharacter(source, target);
         else {
-            if (!m_atomData.characterSet.inverted) {
-                for (const auto& characterIterator : m_atomData.characterSet.characters.setBits())
-                    nfa.addTransition(source, target, static_cast<char>(characterIterator));
+            if (!m_atomData.characterSet.inverted()) {
+                for (UChar i = 0; i < 128; ++i) {
+                    if (m_atomData.characterSet.get(i))
+                        nfa.addTransition(source, target, static_cast<char>(i));
+                }
             } else {
-                for (unsigned i = 1; i < m_atomData.characterSet.characters.size(); ++i) {
-                    if (m_atomData.characterSet.characters.get(i))
-                        continue;
-                    nfa.addTransition(source, target, static_cast<char>(i));
+                for (UChar i = 1; i < 128; ++i) {
+                    if (!m_atomData.characterSet.get(i))
+                        nfa.addTransition(source, target, static_cast<char>(i));
                 }
             }
         }