[WTF] Try using 75% load factor for HashTable
authorysuzuki@apple.com <ysuzuki@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 8 Feb 2020 21:50:02 +0000 (21:50 +0000)
committerysuzuki@apple.com <ysuzuki@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 8 Feb 2020 21:50:02 +0000 (21:50 +0000)
https://bugs.webkit.org/show_bug.cgi?id=207183

Reviewed by Mark Lam.

Source/WTF:

We know that hash-table is one of the most memory consuming part in WebKit.
By analyzing many production hash-table implementations[1], I found that many
of them are using 75% load-factor while our one is 50%.

This patch changes the load-factor from 50% to 75%. But we pick 75% only for
small tables which capacity is <= 1024 based on collected data by micro-benchmarking.
The collected data is telling that longer probe-length affects on performance if table
size gets larger.

[1]: LLVM DenseMap, Abseil's, rust's, and so on.

* wtf/HashTable.h:
(WTF::HashTableCapacityForSize::shouldExpand):
(WTF::HashTableCapacityForSize::capacityForSize):
(WTF::HashTable::shouldExpand const):
(WTF::KeyTraits>::computeBestTableSize):

Tools:

* TestWebKitAPI/Tests/WTF/HashSet.cpp:
(TestWebKitAPI::testInitialCapacity):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@256093 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/WTF/ChangeLog
Source/WTF/wtf/HashTable.h
Tools/ChangeLog
Tools/TestWebKitAPI/Tests/WTF/HashSet.cpp

index f0f0f02..be088e4 100644 (file)
@@ -1,3 +1,27 @@
+2020-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
+
+        [WTF] Try using 75% load factor for HashTable
+        https://bugs.webkit.org/show_bug.cgi?id=207183
+
+        Reviewed by Mark Lam.
+
+        We know that hash-table is one of the most memory consuming part in WebKit.
+        By analyzing many production hash-table implementations[1], I found that many
+        of them are using 75% load-factor while our one is 50%.
+
+        This patch changes the load-factor from 50% to 75%. But we pick 75% only for
+        small tables which capacity is <= 1024 based on collected data by micro-benchmarking.
+        The collected data is telling that longer probe-length affects on performance if table
+        size gets larger.
+
+        [1]: LLVM DenseMap, Abseil's, rust's, and so on.
+
+        * wtf/HashTable.h:
+        (WTF::HashTableCapacityForSize::shouldExpand):
+        (WTF::HashTableCapacityForSize::capacityForSize):
+        (WTF::HashTable::shouldExpand const):
+        (WTF::KeyTraits>::computeBestTableSize):
+
 2020-02-08  Sam Weinig  <weinig@apple.com>
 
         Move trivial definitions from FeatureDefines.xcconfig to PlatformEnableCocoa.h
index 8beea3f..d3ab119 100644 (file)
@@ -303,6 +303,41 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
         explicit operator bool() const { return isNewEntry; }
     };
 
+    // HashTableCapacityForSize computes the upper power of two capacity to hold the size parameter.
+    // This is done at compile time to initialize the HashTraits.
+    template<unsigned size>
+    struct HashTableCapacityForSize {
+        // Load-factor for small table is 75%.
+        static constexpr unsigned smallMaxLoadNumerator = 3;
+        static constexpr unsigned smallMaxLoadDenominator = 4;
+        // Load-factor for large table is 50%.
+        static constexpr unsigned largeMaxLoadNumerator = 1;
+        static constexpr unsigned largeMaxLoadDenominator = 2;
+        static constexpr unsigned maxSmallTableCapacity = 1024;
+        static constexpr unsigned minLoad = 6;
+
+        static constexpr bool shouldExpand(uint64_t keyAndDeleteCount, uint64_t tableSize)
+        {
+            if (tableSize <= maxSmallTableCapacity)
+                return keyAndDeleteCount * smallMaxLoadDenominator >= tableSize * smallMaxLoadNumerator;
+            return keyAndDeleteCount * largeMaxLoadDenominator >= tableSize * largeMaxLoadNumerator;
+        }
+
+        static constexpr unsigned capacityForSize(uint64_t sizeArg)
+        {
+            if (!sizeArg)
+                return 0;
+            uint64_t capacity = roundUpToPowerOfTwo(sizeArg);
+            if (shouldExpand(sizeArg, capacity))
+                return capacity * 2;
+            return capacity;
+        }
+
+        static constexpr unsigned value = capacityForSize(size);
+        static_assert(size > 0, "HashTableNonZeroMinimumCapacity");
+        static_assert(!static_cast<unsigned>(value >> 31), "HashTableNoCapacityOverflow");
+    };
+
     template<typename Key, typename Value, typename Extractor, typename HashFunctions, typename Traits, typename KeyTraits>
     class HashTable {
     public:
@@ -314,6 +349,8 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
         typedef IdentityHashTranslator<ValueTraits, HashFunctions> IdentityTranslatorType;
         typedef HashTableAddResult<iterator> AddResult;
 
+        using HashTableSizePolicy = HashTableCapacityForSize<1>;
+
 #if DUMP_HASHTABLE_STATS_PER_TABLE
         struct Stats {
             WTF_MAKE_STRUCT_FAST_ALLOCATED;
@@ -487,7 +524,7 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
         void remove(ValueType*);
 
         static constexpr unsigned computeBestTableSize(unsigned keyCount);
-        bool shouldExpand() const { return (keyCount() + deletedCount()) * maxLoad >= tableSize(); }
+        bool shouldExpand() const { return HashTableSizePolicy::shouldExpand(keyCount() + deletedCount(), tableSize()); }
         bool mustRehashInPlace() const { return keyCount() * minLoad < tableSize() * 2; }
         bool shouldShrink() const { return keyCount() * minLoad < tableSize() && tableSize() > KeyTraits::minimumTableSize; }
         ValueType* expand(ValueType* entry = nullptr);
@@ -522,8 +559,14 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
         static void invalidateIterators() { }
 #endif
 
-        static constexpr unsigned maxLoad = 2;
-        static constexpr unsigned minLoad = 6;
+        // Load-factor for small table is 75%.
+        static constexpr unsigned smallMaxLoadNumerator = HashTableSizePolicy::smallMaxLoadNumerator;
+        static constexpr unsigned smallMaxLoadDenominator = HashTableSizePolicy::smallMaxLoadDenominator;
+        // Load-factor for large table is 50%.
+        static constexpr unsigned largeMaxLoadNumerator = HashTableSizePolicy::largeMaxLoadNumerator;
+        static constexpr unsigned largeMaxLoadDenominator = HashTableSizePolicy::largeMaxLoadDenominator;
+        static constexpr unsigned maxSmallTableCapacity = HashTableSizePolicy::maxSmallTableCapacity;
+        static constexpr unsigned minLoad = HashTableSizePolicy::minLoad;
 
         static constexpr int tableSizeOffset = -1;
         static constexpr int tableSizeMaskOffset = -2;
@@ -556,44 +599,6 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
 #endif
     };
 
-    // Set all the bits to one after the most significant bit: 00110101010 -> 00111111111.
-    template<unsigned size> struct OneifyLowBits;
-    template<>
-    struct OneifyLowBits<0> {
-        static constexpr unsigned value = 0;
-    };
-    template<unsigned number>
-    struct OneifyLowBits {
-        static constexpr unsigned value = number | OneifyLowBits<(number >> 1)>::value;
-    };
-    // Compute the first power of two integer that is an upper bound of the parameter 'number'.
-    template<unsigned number>
-    struct UpperPowerOfTwoBound {
-        static constexpr unsigned value = (OneifyLowBits<number - 1>::value + 1) * 2;
-    };
-
-    // Because power of two numbers are the limit of maxLoad, their capacity is twice the
-    // UpperPowerOfTwoBound, or 4 times their values.
-    template<unsigned size, bool isPowerOfTwo> struct HashTableCapacityForSizeSplitter;
-    template<unsigned size>
-    struct HashTableCapacityForSizeSplitter<size, true> {
-        static constexpr unsigned value = size * 4;
-    };
-    template<unsigned size>
-    struct HashTableCapacityForSizeSplitter<size, false> {
-        static constexpr unsigned value = UpperPowerOfTwoBound<size>::value;
-    };
-
-    // HashTableCapacityForSize computes the upper power of two capacity to hold the size parameter.
-    // This is done at compile time to initialize the HashTraits.
-    template<unsigned size>
-    struct HashTableCapacityForSize {
-        static constexpr unsigned value = HashTableCapacityForSizeSplitter<size, !(size & (size - 1))>::value;
-        COMPILE_ASSERT(size > 0, HashTableNonZeroMinimumCapacity);
-        COMPILE_ASSERT(!static_cast<unsigned>(value >> 31), HashTableNoCapacityOverflow);
-        COMPILE_ASSERT(value > (2 * size), HashTableCapacityHoldsContentSize);
-    };
-
     template<typename Key, typename Value, typename Extractor, typename HashFunctions, typename Traits, typename KeyTraits>
     inline HashTable<Key, Value, Extractor, HashFunctions, Traits, KeyTraits>::HashTable()
         : m_table(nullptr)
@@ -1236,15 +1241,34 @@ DECLARE_ALLOCATOR_WITH_HEAP_IDENTIFIER(HashTable);
     template<typename Key, typename Value, typename Extractor, typename HashFunctions, typename Traits, typename KeyTraits>
     constexpr unsigned HashTable<Key, Value, Extractor, HashFunctions, Traits, KeyTraits>::computeBestTableSize(unsigned keyCount)
     {
-        unsigned bestTableSize = WTF::roundUpToPowerOfTwo(keyCount) * 2;
+        unsigned bestTableSize = WTF::roundUpToPowerOfTwo(keyCount);
 
-        // With maxLoad at 1/2 and minLoad at 1/6, our average load is 2/6.
-        // If we are getting halfway between 2/6 and 1/2 (past 5/12), we double the size to avoid being too close to
-        // loadMax and bring the ratio close to 2/6. This give us a load in the bounds [3/12, 5/12).
-        bool aboveThreeQuarterLoad = keyCount * 12 >= bestTableSize * 5;
-        if (aboveThreeQuarterLoad)
+        if (HashTableSizePolicy::shouldExpand(keyCount, bestTableSize))
             bestTableSize *= 2;
 
+        auto aboveThresholdForEagerExpansion = [](double loadFactor, unsigned keyCount, unsigned tableSize)
+        {
+            // Here is the rationale behind this calculation, using 3/4 load-factor.
+            // With maxLoad at 3/4 and minLoad at 1/6, our average load is 11/24.
+            // If we are getting half-way between 11/24 and 3/4, we double the size
+            // to avoid being too close to loadMax and bring the ratio close to 11/24. This
+            // give us a load in the bounds [9/24, 15/24).
+            double maxLoadRatio = loadFactor;
+            double minLoadRatio = 1.0 / minLoad;
+            double averageLoadRatio = (maxLoadRatio + minLoadRatio) / 2;
+            double halfWayBetweenAverageAndMaxLoadRatio = (averageLoadRatio + maxLoadRatio) / 2;
+            return keyCount >= tableSize * halfWayBetweenAverageAndMaxLoadRatio;
+        };
+
+        if (bestTableSize <= maxSmallTableCapacity) {
+            constexpr double smallLoadFactor = static_cast<double>(smallMaxLoadNumerator) / smallMaxLoadDenominator;
+            if (aboveThresholdForEagerExpansion(smallLoadFactor, keyCount, bestTableSize))
+                bestTableSize *= 2;
+        } else {
+            constexpr double largeLoadFactor = static_cast<double>(largeMaxLoadNumerator) / largeMaxLoadDenominator;
+            if (aboveThresholdForEagerExpansion(largeLoadFactor, keyCount, bestTableSize))
+                bestTableSize *= 2;
+        }
         unsigned minimumTableSize = KeyTraits::minimumTableSize;
         return std::max(bestTableSize, minimumTableSize);
     }
index c263280..cc846c7 100644 (file)
@@ -1,3 +1,13 @@
+2020-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
+
+        [WTF] Try using 75% load factor for HashTable
+        https://bugs.webkit.org/show_bug.cgi?id=207183
+
+        Reviewed by Mark Lam.
+
+        * TestWebKitAPI/Tests/WTF/HashSet.cpp:
+        (TestWebKitAPI::testInitialCapacity):
+
 2020-02-08  Sam Weinig  <weinig@apple.com>
 
         Move trivial definitions from FeatureDefines.xcconfig to PlatformEnableCocoa.h
index c854f89..498e9ea 100644 (file)
@@ -55,8 +55,8 @@ void testInitialCapacity()
         ASSERT_EQ(initialCapacity, static_cast<unsigned>(testSet.capacity()));
     }
 
-    // Adding items up to less than half the capacity should not change the capacity.
-    unsigned capacityLimit = initialCapacity / 2 - 1;
+    // Adding items up to less than 3/4 of the capacity should not change the capacity.
+    unsigned capacityLimit = initialCapacity * 3 / 4 - 1;
     for (size_t i = size; i < capacityLimit; ++i) {
         testSet.add(i);
         ASSERT_EQ(initialCapacity, static_cast<unsigned>(testSet.capacity()));