Use bloom filter for descendant selector filtering
authorantti@apple.com <antti@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 6 Feb 2011 20:43:41 +0000 (20:43 +0000)
committerantti@apple.com <antti@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 6 Feb 2011 20:43:41 +0000 (20:43 +0000)
https://bugs.webkit.org/show_bug.cgi?id=53880

Reviewed by Maciej Stachowiak.

Source/JavaScriptCore:

Implement a bloom filter with k=2 and 8 bit counting.

* GNUmakefile.am:
* JavaScriptCore.gypi:
* JavaScriptCore.vcproj/WTF/WTF.vcproj:
* JavaScriptCore.xcodeproj/project.pbxproj:
* wtf/BloomFilter.h: Added.
(WTF::BloomFilter::maximumCount):
(WTF::BloomFilter::BloomFilter):
(WTF::BloomFilter::mayContain):
(WTF::BloomFilter::add):
(WTF::BloomFilter::remove):
(WTF::BloomFilter::firstSlot):
(WTF::BloomFilter::secondSlot):
(WTF::::add):
(WTF::::remove):
(WTF::::clear):
(WTF::::likelyEmpty):
(WTF::::isClear):

Source/WebCore:

Bloom filter is faster than a hash set in this kind of use.

Shark thinks this speeds up style matching by ~30% on sites
with lots of descendant selectors.

* ForwardingHeaders/wtf/BloomFilter.h: Added.
* css/CSSStyleSelector.cpp:
(WebCore::collectElementIdentifierHashes):
(WebCore::CSSStyleSelector::pushParent):
(WebCore::CSSStyleSelector::popParent):
(WebCore::CSSStyleSelector::fastRejectSelector):
(WebCore::RuleData::collectDescendantSelectorIdentifierHashes):
* css/CSSStyleSelector.h:

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@77777 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/GNUmakefile.am
Source/JavaScriptCore/JavaScriptCore.gypi
Source/JavaScriptCore/JavaScriptCore.vcproj/WTF/WTF.vcproj
Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
Source/JavaScriptCore/wtf/BloomFilter.h [new file with mode: 0644]
Source/WebCore/ChangeLog
Source/WebCore/ForwardingHeaders/wtf/BloomFilter.h [new file with mode: 0644]
Source/WebCore/css/CSSStyleSelector.cpp
Source/WebCore/css/CSSStyleSelector.h

index a54012a352616847427cb63c9c7ce914c04d3713..3fe0e8bb7beb56e861ff5cbe8661fbabf3bbc879 100644 (file)
@@ -1,3 +1,30 @@
+2011-02-06  Antti Koivisto  <antti@apple.com>
+
+        Reviewed by Maciej Stachowiak.
+
+        Use bloom filter for descendant selector filtering
+        https://bugs.webkit.org/show_bug.cgi?id=53880
+        
+        Implement a bloom filter with k=2 and 8 bit counting.
+
+        * GNUmakefile.am:
+        * JavaScriptCore.gypi:
+        * JavaScriptCore.vcproj/WTF/WTF.vcproj:
+        * JavaScriptCore.xcodeproj/project.pbxproj:
+        * wtf/BloomFilter.h: Added.
+        (WTF::BloomFilter::maximumCount):
+        (WTF::BloomFilter::BloomFilter):
+        (WTF::BloomFilter::mayContain):
+        (WTF::BloomFilter::add):
+        (WTF::BloomFilter::remove):
+        (WTF::BloomFilter::firstSlot):
+        (WTF::BloomFilter::secondSlot):
+        (WTF::::add):
+        (WTF::::remove):
+        (WTF::::clear):
+        (WTF::::likelyEmpty):
+        (WTF::::isClear):
+
 2011-02-04  Geoffrey Garen  <ggaren@apple.com>
 
         Reviewed by Oliver Hunt.
index aadcd0980a79710fa160cdb1f2b97af204abfec5..e48fdce69bcb13660643022e0bc1d5c4f48b8b1c 100644 (file)
@@ -438,6 +438,7 @@ javascriptcore_sources += \
        Source/JavaScriptCore/wtf/Atomics.h \
        Source/JavaScriptCore/wtf/AVLTree.h \
        Source/JavaScriptCore/wtf/Bitmap.h \
+       Source/JavaScriptCore/wtf/BloomFilter.h \
        Source/JavaScriptCore/wtf/BumpPointerAllocator.h \
        Source/JavaScriptCore/wtf/ByteArray.cpp \
        Source/JavaScriptCore/wtf/ByteArray.h \
index 85a413c05df7c69069bd8295192382da1b127c46..41786838d60aae0b6e1f8e9744282c253417babf 100644 (file)
             'wtf/Atomics.h',
             'wtf/AVLTree.h',
             'wtf/Bitmap.h',
+            'wtf/BloomFilter.h',
             'wtf/ByteArray.cpp',
             'wtf/ByteArray.h',
             'wtf/chromium/ChromiumThreading.h',
index 66c69e05c477614a351a6b0d2acbdf2314458319..d2b81a847e461aa6cb763472acd51e9b888dbe1b 100644 (file)
                        RelativePath="..\..\wtf\Bitmap.h"
                        >
                </File>
+               <File
+                       RelativePath="..\..\wtf\BloomFilter.h"
+                       >
+               </File>
                <File
                        RelativePath="..\..\wtf\BumpPointerAllocator.h"
                        >
index fa4e240c8ad8692da965a5413385a9ffed7bb9c2..de6106cab704d9fc25fd74b9cc5fe96b44befa6d 100644 (file)
                E49DC16B12EF293E00184A1F /* SourceProviderCache.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E49DC15512EF277200184A1F /* SourceProviderCache.cpp */; };
                E49DC16C12EF294E00184A1F /* SourceProviderCache.h in Headers */ = {isa = PBXBuildFile; fileRef = E49DC15112EF272200184A1F /* SourceProviderCache.h */; settings = {ATTRIBUTES = (Private, ); }; };
                E49DC16D12EF295300184A1F /* SourceProviderCacheItem.h in Headers */ = {isa = PBXBuildFile; fileRef = E49DC14912EF261A00184A1F /* SourceProviderCacheItem.h */; };
+               E4D8CEFB12FC439600BC9F5A /* BloomFilter.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D8CE9B12FC42E100BC9F5A /* BloomFilter.h */; settings = {ATTRIBUTES = (Private, ); }; };
                F3BD31ED126735770065467F /* TextPosition.h in Headers */ = {isa = PBXBuildFile; fileRef = F3BD31D0126730180065467F /* TextPosition.h */; settings = {ATTRIBUTES = (Private, ); }; };
                FDA15C1E12B0305C003A583A /* Complex.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA15C1612B03028003A583A /* Complex.h */; settings = {ATTRIBUTES = (Private, ); }; };
                FE1B447A0ECCD73B004F4DD1 /* StdLibExtras.h in Headers */ = {isa = PBXBuildFile; fileRef = FE1B44790ECCD73B004F4DD1 /* StdLibExtras.h */; settings = {ATTRIBUTES = (Private, ); }; };
                E49DC14912EF261A00184A1F /* SourceProviderCacheItem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SourceProviderCacheItem.h; sourceTree = "<group>"; };
                E49DC15112EF272200184A1F /* SourceProviderCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SourceProviderCache.h; sourceTree = "<group>"; };
                E49DC15512EF277200184A1F /* SourceProviderCache.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SourceProviderCache.cpp; sourceTree = "<group>"; };
+               E4D8CE9B12FC42E100BC9F5A /* BloomFilter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BloomFilter.h; sourceTree = "<group>"; };
                F3BD31D0126730180065467F /* TextPosition.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TextPosition.h; path = text/TextPosition.h; sourceTree = "<group>"; };
                F5BB2BC5030F772101FCFE1D /* Completion.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = Completion.h; sourceTree = "<group>"; tabWidth = 8; };
                F5C290E60284F98E018635CA /* JavaScriptCorePrefix.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = JavaScriptCorePrefix.h; sourceTree = "<group>"; tabWidth = 8; };
                                6580F795094070560082C219 /* PassRefPtr.h */,
                                65D6D87E09B5A32E0002E4D7 /* Platform.h */,
                                A7D649A91015224E009B2E1B /* PossiblyNull.h */,
+                               E4D8CE9B12FC42E100BC9F5A /* BloomFilter.h */,
                                088FA5B90EF76D4300578E6F /* RandomNumber.cpp */,
                                088FA5BA0EF76D4300578E6F /* RandomNumber.h */,
                                08E279E80EF83B10007DB523 /* RandomNumberSeed.h */,
                                E49DC16C12EF294E00184A1F /* SourceProviderCache.h in Headers */,
                                E49DC16D12EF295300184A1F /* SourceProviderCacheItem.h in Headers */,
                                14C824AD12F7C785008F35E0 /* MarkedBlock.h in Headers */,
+                               E4D8CEFB12FC439600BC9F5A /* BloomFilter.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/Source/JavaScriptCore/wtf/BloomFilter.h b/Source/JavaScriptCore/wtf/BloomFilter.h
new file mode 100644 (file)
index 0000000..ae5b4a2
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2011 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BloomFilter_h
+#define BloomFilter_h
+
+#include <wtf/AlwaysInline.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WTF {
+
+// Counting bloom filter with k=2 and 8 bit counters. Uses 2^keyBits bytes of memory.
+// False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique 
+// keys and m is the table size (==2^keyBits).
+template <unsigned keyBits>
+class BloomFilter {
+public:
+    COMPILE_ASSERT(keyBits <= 16, bloom_filter_key_size);
+
+    static const size_t tableSize = 1 << keyBits;
+    static const unsigned keyMask = (1 << keyBits) - 1;
+    static uint8_t maximumCount() { return std::numeric_limits<uint8_t>::max(); }
+    
+    BloomFilter() { clear(); }
+
+    void add(unsigned hash);
+    void remove(unsigned hash);
+
+    // The filter may give false positives (claim it may contain a key it doesn't)
+    // but never false negatives (claim it doesn't contain a key it does).
+    bool mayContain(unsigned hash) const { return firstSlot(hash) && secondSlot(hash); }
+    
+    // The filter must be cleared before reuse even if all keys are removed.
+    // Otherwise overflowed keys will stick around.
+    void clear();
+
+    void add(const AtomicString& string) { add(string.impl()->existingHash()); }
+    void add(const String& string) { add(string.impl()->hash()); }
+    void remove(const AtomicString& string) { remove(string.impl()->existingHash()); }
+    void remove(const String& string) { remove(string.impl()->hash()); }
+
+    bool mayContain(const AtomicString& string) const { return mayContain(string.impl()->existingHash()); }
+    bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
+
+#ifndef ASSERTS_DISABLED
+    // Slow, for use in asserts only.
+    bool likelyEmpty() const;
+    bool isClear() const;
+#endif
+
+private:
+    uint8_t& firstSlot(unsigned hash) { return m_table[hash & keyMask]; }
+    uint8_t& secondSlot(unsigned hash) { return m_table[(hash >> 16) & keyMask]; }
+    const uint8_t& firstSlot(unsigned hash) const { return m_table[hash & keyMask]; }
+    const uint8_t& secondSlot(unsigned hash) const { return m_table[(hash >> 16) & keyMask]; }
+
+    uint8_t m_table[tableSize];
+};
+    
+template <unsigned keyBits>
+inline void BloomFilter<keyBits>::add(unsigned hash)
+{
+    uint8_t& first = firstSlot(hash);
+    uint8_t& second = secondSlot(hash);
+    if (LIKELY(first < maximumCount()))
+        ++first;
+    if (LIKELY(second < maximumCount()))
+        ++second;
+}
+
+template <unsigned keyBits>
+inline void BloomFilter<keyBits>::remove(unsigned hash)
+{
+    uint8_t& first = firstSlot(hash);
+    uint8_t& second = secondSlot(hash);
+    ASSERT(first);
+    ASSERT(second);
+    // In case of an overflow, the slot sticks in the table until clear().
+    if (LIKELY(first < maximumCount()))
+        --first;
+    if (LIKELY(second < maximumCount()))
+        --second;
+}
+    
+template <unsigned keyBits>
+inline void BloomFilter<keyBits>::clear()
+{
+    memset(m_table, 0, tableSize);
+}
+
+template <unsigned keyBits>
+bool BloomFilter<keyBits>::likelyEmpty() const
+{
+    for (size_t n = 0; n < tableSize; ++n) {
+        if (m_table[n] && m_table[n] != maximumCount())
+            return false;
+    }
+    return true;
+}
+
+template <unsigned keyBits>
+bool BloomFilter<keyBits>::isClear() const
+{
+    for (size_t n = 0; n < tableSize; ++n) {
+        if (m_table[n])
+            return false;
+    }
+    return true;
+}
+
+}
+
+using WTF::BloomFilter;
+
+#endif
index 232fa0791701ca6e6d6e717e5b555ccba0a3f14e..58eafc251350bd78630804f0ab541625d608297a 100644 (file)
@@ -1,3 +1,24 @@
+2011-02-06  Antti Koivisto  <antti@apple.com>
+
+        Reviewed by Maciej Stachowiak.
+
+        Use bloom filter for descendant selector filtering
+        https://bugs.webkit.org/show_bug.cgi?id=53880
+        
+        Bloom filter is faster than a hash set in this kind of use.
+        
+        Shark thinks this speeds up style matching by ~30% on sites
+        with lots of descendant selectors.
+
+        * ForwardingHeaders/wtf/BloomFilter.h: Added.
+        * css/CSSStyleSelector.cpp:
+        (WebCore::collectElementIdentifierHashes):
+        (WebCore::CSSStyleSelector::pushParent):
+        (WebCore::CSSStyleSelector::popParent):
+        (WebCore::CSSStyleSelector::fastRejectSelector):
+        (WebCore::RuleData::collectDescendantSelectorIdentifierHashes):
+        * css/CSSStyleSelector.h:
+
 2011-02-06  Maciej Stachowiak  <mjs@apple.com>
 
         Reviewed by Antti Koivisto.
diff --git a/Source/WebCore/ForwardingHeaders/wtf/BloomFilter.h b/Source/WebCore/ForwardingHeaders/wtf/BloomFilter.h
new file mode 100644 (file)
index 0000000..98a67b9
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef WebCore_FWD_BloomFilter_h
+#define WebCore_FWD_BloomFilter_h
+#include <JavaScriptCore/BloomFilter.h>
+#endif
index c8cd991803432f13abb3b40b92bf0a2239b371db..8a4ac3890eeeacf8539ffc3f8c7cb4eddf750dbc 100644 (file)
@@ -651,32 +651,36 @@ static void loadViewSourceStyle()
     defaultViewSourceStyle->addRulesFromSheet(parseUASheet(sourceUserAgentStyleSheet, sizeof(sourceUserAgentStyleSheet)), screenEval());
 }
     
-static inline void collectElementIdentifierHashes(Element* element, Vector<unsigned, 4>& identifierHashes)
+static inline void collectElementIdentifierHashes(const Element* element, Vector<unsigned, 4>& identifierHashes)
 {
     identifierHashes.append(element->localName().impl()->existingHash());
     if (element->hasID())
         identifierHashes.append(element->idForStyleResolution().impl()->existingHash());
-    StyledElement* styledElement = element->isStyledElement() ? static_cast<StyledElement*>(element) : 0;
+    const StyledElement* styledElement = element->isStyledElement() ? static_cast<const StyledElement*>(element) : 0;
     if (styledElement && styledElement->hasClass()) {
-        const SpaceSplitString& classNames = static_cast<StyledElement*>(element)->classNames();
+        const SpaceSplitString& classNames = styledElement->classNames();
         size_t count = classNames.size();
         for (size_t i = 0; i < count; ++i)
             identifierHashes.append(classNames[i].impl()->existingHash());
     }
 }
-    
+
 void CSSStyleSelector::pushParent(Element* parent)
 {
     // If we are not invoked consistently for each parent, just pause maintaining the stack.
     // There are all kinds of wacky special cases where the style recalc may temporarily branch to some random elements.
     // FIXME: Perhaps we should fix up the stack instead? There is some danger of getting into O(n^2) situations doing that.
     if (m_parentStack.isEmpty()) {
-        ASSERT(m_ancestorIdentifierFilter.isEmpty());
+        ASSERT(!m_ancestorIdentifierFilter);
         // We must start from the root.
         if (parent->parentElement())
             return;
-    } else if (m_parentStack.last().element != parent->parentElement())
-        return;
+        m_ancestorIdentifierFilter = adoptPtr(new BloomFilter<bloomFilterKeyBits>);
+    } else {
+        ASSERT(m_ancestorIdentifierFilter);
+        if (m_parentStack.last().element != parent->parentElement())
+            return;
+    }
 
     m_parentStack.append(ParentStackFrame(parent));
     ParentStackFrame& parentFrame = m_parentStack.last();
@@ -685,20 +689,24 @@ void CSSStyleSelector::pushParent(Element* parent)
     collectElementIdentifierHashes(parent, parentFrame.identifierHashes);
     size_t count = parentFrame.identifierHashes.size();
     for (size_t i = 0; i < count; ++i)
-        m_ancestorIdentifierFilter.add(parentFrame.identifierHashes[i]);
+        m_ancestorIdentifierFilter->add(parentFrame.identifierHashes[i]);
 }
 
 void CSSStyleSelector::popParent(Element* parent)
 {
     if (m_parentStack.isEmpty() || m_parentStack.last().element != parent)
         return;
+    ASSERT(m_ancestorIdentifierFilter);
 
     const ParentStackFrame& parentFrame = m_parentStack.last();
     size_t count = parentFrame.identifierHashes.size();
     for (size_t i = 0; i < count; ++i)
-        m_ancestorIdentifierFilter.remove(parentFrame.identifierHashes[i]);
+        m_ancestorIdentifierFilter->remove(parentFrame.identifierHashes[i]);
     m_parentStack.removeLast();
-    ASSERT(!m_parentStack.isEmpty() || m_ancestorIdentifierFilter.isEmpty());
+    if (m_parentStack.isEmpty()) {
+        ASSERT(m_ancestorIdentifierFilter->likelyEmpty());
+        m_ancestorIdentifierFilter.clear();
+    }
 }
 
 void CSSStyleSelector::addMatchedDeclaration(CSSMutableStyleDeclaration* decl)
@@ -753,9 +761,10 @@ void CSSStyleSelector::matchRules(RuleSet* rules, int& firstRuleIndex, int& last
 
 inline bool CSSStyleSelector::fastRejectSelector(const RuleData& ruleData) const
 {
+    ASSERT(m_ancestorIdentifierFilter);
     const unsigned* descendantSelectorIdentifierHashes = ruleData.descendantSelectorIdentifierHashes();
     for (unsigned n = 0; n < RuleData::maximumIdentifierCount && descendantSelectorIdentifierHashes[n]; ++n) {
-        if (!m_ancestorIdentifierFilter.contains(descendantSelectorIdentifierHashes[n]))
+        if (!m_ancestorIdentifierFilter->mayContain(descendantSelectorIdentifierHashes[n]))
             return true;
     }
     return false;
@@ -2937,7 +2946,7 @@ inline void RuleData::collectDescendantSelectorIdentifierHashes()
     }
     for (; selector; selector = selector->tagHistory()) {
         // Only collect identifiers that match direct ancestors.
-        // FIXME: Intead of just stopping, this should skip over sibling selectors.
+        // FIXME: Instead of just stopping, this should skip over sibling selectors.
         if (relation != CSSSelector::Descendant && relation != CSSSelector::Child && relation != CSSSelector::SubSelector)
             break;
         if ((selector->m_match == CSSSelector::Id || selector->m_match == CSSSelector::Class) && !selector->value().isEmpty())
index 18606234c79b80a23dd2c03a6a2a4df51b6a5735..a89c080bf733c311093736b66da3a5d7492ccb9e 100644 (file)
@@ -27,7 +27,7 @@
 #include "LinkHash.h"
 #include "MediaQueryExp.h"
 #include "RenderStyle.h"
-#include <wtf/HashCountedSet.h>
+#include <wtf/BloomFilter.h>
 #include <wtf/HashMap.h>
 #include <wtf/HashSet.h>
 #include <wtf/RefPtr.h>
@@ -216,8 +216,10 @@ public:
             Vector<unsigned, 4> identifierHashes;
         };
         Vector<ParentStackFrame> m_parentStack;
-        // FIXME: Replace this with a bloom filter.
-        HashCountedSet<unsigned, AlreadyHashed> m_ancestorIdentifierFilter;
+        
+        // With 100 unique strings in the filter, 2^12 slot table has false positive rate of ~0.2%.
+        static const unsigned bloomFilterKeyBits = 12;
+        OwnPtr<BloomFilter<bloomFilterKeyBits> > m_ancestorIdentifierFilter;
 
         bool m_hasUAAppearance;
         BorderData m_borderData;