GC allocation fast path has too many operations.
authorcommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 15 Jul 2011 01:40:25 +0000 (01:40 +0000)
committercommit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 15 Jul 2011 01:40:25 +0000 (01:40 +0000)
https://bugs.webkit.org/show_bug.cgi?id=64493

Patch by Filip Pizlo <fpizlo@apple.com> on 2011-07-14
Reviewed by Darin Adler.

Changed the timing of the lazy sweep so that it occurs when we land on
a previously-unsweeped block, rather than whenever we land on an unsweeped
cell.  After the per-block lazy sweep occurs, the block is turned into a
singly linked list of free cells.  The allocation fast path is now just a
load-branch-store to remove a cell from the head of the list.

Additionally, this changes the way new blocks are allocated.  Previously,
they would be populated with dummy cells.  With this patch, they are
turned into a free list, which means that there will never be destructor
calls for allocations in fresh blocks.

These changes result in a 1.9% speed-up on V8, and a 0.6% speed-up on
SunSpider.  There are no observed statistically significant slow-downs
on any individual benchmark.

* JavaScriptCore.exp:
* heap/Heap.cpp:
(JSC::Heap::allocateSlowCase):
(JSC::Heap::collect):
(JSC::Heap::canonicalizeBlocks):
(JSC::Heap::resetAllocator):
* heap/Heap.h:
(JSC::Heap::forEachProtectedCell):
(JSC::Heap::forEachCell):
(JSC::Heap::forEachBlock):
(JSC::Heap::allocate):
* heap/MarkedBlock.cpp:
(JSC::MarkedBlock::MarkedBlock):
(JSC::MarkedBlock::lazySweep):
(JSC::MarkedBlock::blessNewBlockForFastPath):
(JSC::MarkedBlock::blessNewBlockForSlowPath):
(JSC::MarkedBlock::canonicalizeBlock):
* heap/MarkedBlock.h:
* heap/NewSpace.cpp:
(JSC::NewSpace::addBlock):
(JSC::NewSpace::canonicalizeBlocks):
* heap/NewSpace.h:
(JSC::NewSpace::allocate):
(JSC::NewSpace::SizeClass::SizeClass):
(JSC::NewSpace::SizeClass::canonicalizeBlock):
* heap/OldSpace.cpp:
(JSC::OldSpace::addBlock):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@91039 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/JavaScriptCore.exp
Source/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
Source/JavaScriptCore/heap/Heap.cpp
Source/JavaScriptCore/heap/Heap.h
Source/JavaScriptCore/heap/MarkedBlock.cpp
Source/JavaScriptCore/heap/MarkedBlock.h
Source/JavaScriptCore/heap/MarkedBlockSet.h
Source/JavaScriptCore/heap/NewSpace.cpp
Source/JavaScriptCore/heap/NewSpace.h
Source/JavaScriptCore/heap/OldSpace.cpp

index 938ece9..47f1625 100644 (file)
@@ -1,5 +1,55 @@
 2011-07-14  Filip Pizlo  <fpizlo@apple.com>
 
+        GC allocation fast path has too many operations.
+        https://bugs.webkit.org/show_bug.cgi?id=64493
+
+        Reviewed by Darin Adler.
+        
+        Changed the timing of the lazy sweep so that it occurs when we land on
+        a previously-unsweeped block, rather than whenever we land on an unsweeped
+        cell.  After the per-block lazy sweep occurs, the block is turned into a
+        singly linked list of free cells.  The allocation fast path is now just a
+        load-branch-store to remove a cell from the head of the list.
+        
+        Additionally, this changes the way new blocks are allocated.  Previously,
+        they would be populated with dummy cells.  With this patch, they are
+        turned into a free list, which means that there will never be destructor
+        calls for allocations in fresh blocks.
+        
+        These changes result in a 1.9% speed-up on V8, and a 0.6% speed-up on
+        SunSpider.  There are no observed statistically significant slow-downs
+        on any individual benchmark.
+
+        * JavaScriptCore.exp:
+        * heap/Heap.cpp:
+        (JSC::Heap::allocateSlowCase):
+        (JSC::Heap::collect):
+        (JSC::Heap::canonicalizeBlocks):
+        (JSC::Heap::resetAllocator):
+        * heap/Heap.h:
+        (JSC::Heap::forEachProtectedCell):
+        (JSC::Heap::forEachCell):
+        (JSC::Heap::forEachBlock):
+        (JSC::Heap::allocate):
+        * heap/MarkedBlock.cpp:
+        (JSC::MarkedBlock::MarkedBlock):
+        (JSC::MarkedBlock::lazySweep):
+        (JSC::MarkedBlock::blessNewBlockForFastPath):
+        (JSC::MarkedBlock::blessNewBlockForSlowPath):
+        (JSC::MarkedBlock::canonicalizeBlock):
+        * heap/MarkedBlock.h:
+        * heap/NewSpace.cpp:
+        (JSC::NewSpace::addBlock):
+        (JSC::NewSpace::canonicalizeBlocks):
+        * heap/NewSpace.h:
+        (JSC::NewSpace::allocate):
+        (JSC::NewSpace::SizeClass::SizeClass):
+        (JSC::NewSpace::SizeClass::canonicalizeBlock):
+        * heap/OldSpace.cpp:
+        (JSC::OldSpace::addBlock):
+
+2011-07-14  Filip Pizlo  <fpizlo@apple.com>
+
         DFG JIT crashes on host constructor calls in debug mode.
         https://bugs.webkit.org/show_bug.cgi?id=64562
         
index 72a9f32..9500b8c 100644 (file)
@@ -224,6 +224,7 @@ __ZN3JSC35createInterruptedExecutionExceptionEPNS_12JSGlobalDataE
 __ZN3JSC41constructFunctionSkippingEvalEnabledCheckEPNS_9ExecStateEPNS_14JSGlobalObjectERKNS_7ArgListERKNS_10IdentifierERKNS_7UStringEi
 __ZN3JSC4Heap11objectCountEv
 __ZN3JSC4Heap16activityCallbackEv
+__ZN3JSC4Heap16allocateSlowCaseERNS_8NewSpace9SizeClassE
 __ZN3JSC4Heap16objectTypeCountsEv
 __ZN3JSC4Heap17collectAllGarbageEv
 __ZN3JSC4Heap17globalObjectCountEv
@@ -237,7 +238,6 @@ __ZN3JSC4Heap29reportExtraMemoryCostSlowCaseEm
 __ZN3JSC4Heap4sizeEv
 __ZN3JSC4Heap7destroyEv
 __ZN3JSC4Heap7protectENS_7JSValueE
-__ZN3JSC4Heap8allocateERNS_8NewSpace9SizeClassE
 __ZN3JSC4Heap8capacityEv
 __ZN3JSC4Heap9unprotectENS_7JSValueE
 __ZN3JSC4Yarr11YarrPatternC1ERKNS_7UStringEbbPPKc
index e1324f6..8b26a03 100644 (file)
@@ -56,8 +56,8 @@ EXPORTS
     ?addSlowCase@Identifier@JSC@@CA?AV?$PassRefPtr@VStringImpl@WTF@@@WTF@@PAVExecState@2@PAVStringImpl@4@@Z
     ?addStaticGlobals@JSGlobalObject@JSC@@IAEXPAUGlobalPropertyInfo@12@H@Z
     ?allocate@Heap@JSC@@QAEPAXAAUSizeClass@NewSpace@2@@Z
-    ?allocate@Heap@JSC@@QAEPAXI@Z
     ?allocatePropertyStorage@JSObject@JSC@@QAEXII@Z
+    ?allocateSlowCase@Heap@JSC@@AAEPAXAAUSizeClass@NewSpace@2@@Z
     ?append@StringBuilder@WTF@@QAEXPBDI@Z
     ?append@StringBuilder@WTF@@QAEXPB_WI@Z
     ?ascii@UString@JSC@@QBE?AVCString@WTF@@XZ
index b12df6e..a084c77 100644 (file)
@@ -102,15 +102,6 @@ inline void ClearMarks::operator()(MarkedBlock* block)
     block->clearMarks();
 }
 
-struct ResetAllocator : MarkedBlock::VoidFunctor {
-    void operator()(MarkedBlock*);
-};
-
-inline void ResetAllocator::operator()(MarkedBlock* block)
-{
-    block->resetAllocator();
-}
-
 struct Sweep : MarkedBlock::VoidFunctor {
     void operator()(MarkedBlock*);
 };
@@ -320,7 +311,7 @@ inline void* Heap::tryAllocate(NewSpace::SizeClass& sizeClass)
     return result;
 }
 
-void* Heap::allocate(NewSpace::SizeClass& sizeClass)
+void* Heap::allocateSlowCase(NewSpace::SizeClass& sizeClass)
 {
 #if COLLECT_ON_EVERY_ALLOCATION
     collectAllGarbage();
@@ -558,7 +549,9 @@ void Heap::collect(SweepToggle sweepToggle)
     ASSERT(globalData()->identifierTable == wtfThreadData().currentIdentifierTable());
     ASSERT(m_isSafeToCollect);
     JAVASCRIPTCORE_GC_BEGIN();
-
+    
+    canonicalizeBlocks();
+    
     markRoots();
     m_handleHeap.finalizeWeakHandles();
     m_globalData->smallStrings.finalizeSmallStrings();
@@ -588,11 +581,15 @@ void Heap::collect(SweepToggle sweepToggle)
     (*m_activityCallback)();
 }
 
+void Heap::canonicalizeBlocks()
+{
+    m_newSpace.canonicalizeBlocks();
+}
+
 void Heap::resetAllocator()
 {
     m_extraCost = 0;
     m_newSpace.resetAllocator();
-    forEachBlock<ResetAllocator>();
 }
 
 void Heap::setActivityCallback(PassOwnPtr<GCActivityCallback> activityCallback)
index d17e247..ee1528e 100644 (file)
 
 #include "HandleHeap.h"
 #include "HandleStack.h"
-#include "SlotVisitor.h"
+#include "MarkedBlock.h"
 #include "MarkedBlockSet.h"
 #include "NewSpace.h"
+#include "SlotVisitor.h"
 #include <wtf/Forward.h>
 #include <wtf/HashCountedSet.h>
 #include <wtf/HashSet.h>
@@ -124,8 +125,8 @@ namespace JSC {
         static const size_t maxExtraCost = 1024 * 1024;
 
         bool isValidAllocation(size_t);
-        void* allocateSlowCase(size_t);
         void reportExtraMemoryCostSlowCase(size_t);
+        void canonicalizeBlocks();
         void resetAllocator();
 
         MarkedBlock* allocateBlock(size_t cellSize);
@@ -137,6 +138,7 @@ namespace JSC {
         void markTempSortVectors(HeapRootVisitor&);
 
         void* tryAllocate(NewSpace::SizeClass&);
+        void* allocateSlowCase(NewSpace::SizeClass&);
         
         enum SweepToggle { DoNotSweep, DoSweep };
         void collect(SweepToggle);
@@ -242,6 +244,7 @@ namespace JSC {
 
     template<typename Functor> inline typename Functor::ReturnType Heap::forEachProtectedCell(Functor& functor)
     {
+        canonicalizeBlocks();
         ProtectCountSet::iterator end = m_protectedValues.end();
         for (ProtectCountSet::iterator it = m_protectedValues.begin(); it != end; ++it)
             functor(it->first);
@@ -258,6 +261,7 @@ namespace JSC {
 
     template<typename Functor> inline typename Functor::ReturnType Heap::forEachCell(Functor& functor)
     {
+        canonicalizeBlocks();
         BlockIterator end = m_blocks.set().end();
         for (BlockIterator it = m_blocks.set().begin(); it != end; ++it)
             (*it)->forEachCell(functor);
@@ -272,6 +276,7 @@ namespace JSC {
 
     template<typename Functor> inline typename Functor::ReturnType Heap::forEachBlock(Functor& functor)
     {
+        canonicalizeBlocks();
         BlockIterator end = m_blocks.set().end();
         for (BlockIterator it = m_blocks.set().begin(); it != end; ++it)
             functor(*it);
@@ -283,6 +288,17 @@ namespace JSC {
         Functor functor;
         return forEachBlock(functor);
     }
+    
+    inline void* Heap::allocate(NewSpace::SizeClass& sizeClass)
+    {
+        // This is a light-weight fast path to cover the most common case.
+        MarkedBlock::FreeCell* firstFreeCell = sizeClass.firstFreeCell;
+        if (UNLIKELY(!firstFreeCell))
+            return allocateSlowCase(sizeClass);
+        
+        sizeClass.firstFreeCell = firstFreeCell->next;
+        return firstFreeCell;
+    }
 
     inline void* Heap::allocate(size_t bytes)
     {
index 9393cbe..e3f4819 100644 (file)
@@ -49,17 +49,12 @@ void MarkedBlock::destroy(MarkedBlock* block)
 }
 
 MarkedBlock::MarkedBlock(const PageAllocationAligned& allocation, Heap* heap, size_t cellSize)
-    : m_nextAtom(firstAtom())
-    , m_inNewSpace(false)
+    : m_inNewSpace(false)
     , m_allocation(allocation)
     , m_heap(heap)
 {
     m_atomsPerCell = (cellSize + atomSize - 1) / atomSize;
     m_endAtom = atomsPerBlock - m_atomsPerCell + 1;
-
-    Structure* dummyMarkableCellStructure = heap->globalData()->dummyMarkableCellStructure.get();
-    for (size_t i = firstAtom(); i < m_endAtom; i += m_atomsPerCell)
-        new (&atoms()[i]) JSCell(*heap->globalData(), dummyMarkableCellStructure, JSCell::CreatingEarlyCell);
 }
 
 void MarkedBlock::sweep()
@@ -85,6 +80,64 @@ void MarkedBlock::sweep()
     }
 }
 
+MarkedBlock::FreeCell* MarkedBlock::lazySweep()
+{
+    // This returns a free list that is ordered in reverse through the block.
+    // This is fine, since the allocation code makes no assumptions about the
+    // order of the free list.
+    
+    FreeCell* result = 0;
+    
+    for (size_t i = firstAtom(); i < m_endAtom; i += m_atomsPerCell) {
+        if (!m_marks.testAndSet(i)) {
+            JSCell* cell = reinterpret_cast<JSCell*>(&atoms()[i]);
+            cell->~JSCell();
+            FreeCell* freeCell = reinterpret_cast<FreeCell*>(cell);
+            freeCell->next = result;
+            result = freeCell;
+        }
+    }
+    
+    return result;
+}
+
+MarkedBlock::FreeCell* MarkedBlock::blessNewBlockForFastPath()
+{
+    // This returns a free list that is ordered in reverse through the block,
+    // as in lazySweep() above.
+    
+    FreeCell* result = 0;
+    for (size_t i = firstAtom(); i < m_endAtom; i += m_atomsPerCell) {
+        m_marks.set(i);
+        FreeCell* freeCell = reinterpret_cast<FreeCell*>(&atoms()[i]);
+        freeCell->next = result;
+        result = freeCell;
+    }
+    return result;
+}
+
+void MarkedBlock::blessNewBlockForSlowPath()
+{
+    Structure* dummyMarkableCellStructure = m_heap->globalData()->dummyMarkableCellStructure.get();
+    for (size_t i = firstAtom(); i < m_endAtom; i += m_atomsPerCell)
+        new (&atoms()[i]) JSCell(*m_heap->globalData(), dummyMarkableCellStructure, JSCell::CreatingEarlyCell);
+}
+
+void MarkedBlock::canonicalizeBlock(FreeCell* firstFreeCell)
+{
+    Structure* dummyMarkableCellStructure = m_heap->globalData()->dummyMarkableCellStructure.get();
+    
+    for (FreeCell* current = firstFreeCell; current;) {
+        FreeCell* next = current->next;
+        size_t i = atomNumber(current);
+        
+        m_marks.clear(i);
+        new (static_cast<void*>(current)) JSCell(*m_heap->globalData(), dummyMarkableCellStructure, JSCell::CreatingEarlyCell);
+
+        current = next;
+    }
+}
+
 #if ENABLE(JSC_ZOMBIES)
 void MarkedBlock::clearMarks()
 {
index 82b98d2..e2d2149 100644 (file)
@@ -48,6 +48,10 @@ namespace JSC {
         static const size_t atomsPerBlock = blockSize / atomSize; // ~1.5% overhead
         static const size_t ownerSetsPerBlock = 8; // ~2% overhead.
 
+        struct FreeCell {
+            FreeCell* next;
+        };
+        
         struct VoidFunctor {
             typedef void ReturnType;
             void returnValue() { }
@@ -84,9 +88,22 @@ namespace JSC {
         void setInNewSpace(bool);
 
         void* allocate();
-        void resetAllocator();
         void sweep();
         
+        // This invokes destructors on all cells that are not marked, marks
+        // them, and returns a linked list of those cells.
+        FreeCell* lazySweep();
+        
+        // These should be called immediately after a block is created.
+        // Blessing for fast path creates a linked list, while blessing for
+        // slow path creates dummy cells.
+        FreeCell* blessNewBlockForFastPath();
+        void blessNewBlockForSlowPath();
+        
+        // This unmarks all cells on the free list, and allocates dummy JSCells
+        // in their place.
+        void canonicalizeBlock(FreeCell* firstFreeCell);
+        
         bool isEmpty();
 
         void clearMarks();
@@ -118,7 +135,6 @@ namespace JSC {
         size_t atomNumber(const void*);
         size_t ownerSetNumber(const JSCell*);
 
-        size_t m_nextAtom;
         size_t m_endAtom; // This is a fuzzy end. Always test for < m_endAtom.
         size_t m_atomsPerCell;
         WTF::Bitmap<blockSize / atomSize> m_marks;
@@ -165,11 +181,6 @@ namespace JSC {
         m_inNewSpace = inNewSpace;
     }
 
-    inline void MarkedBlock::resetAllocator()
-    {
-        m_nextAtom = firstAtom();
-    }
-
     inline bool MarkedBlock::isEmpty()
     {
         return m_marks.isEmpty();
@@ -235,22 +246,7 @@ namespace JSC {
             functor(reinterpret_cast<JSCell*>(&atoms()[i]));
         }
     }
-
-    inline void* MarkedBlock::allocate()
-    {
-        while (m_nextAtom < m_endAtom) {
-            if (!m_marks.testAndSet(m_nextAtom)) {
-                JSCell* cell = reinterpret_cast<JSCell*>(&atoms()[m_nextAtom]);
-                m_nextAtom += m_atomsPerCell;
-                destructor(cell);
-                return cell;
-            }
-            m_nextAtom += m_atomsPerCell;
-        }
-
-        return 0;
-    }
-
+    
     inline size_t MarkedBlock::ownerSetNumber(const JSCell* cell)
     {
         return (reinterpret_cast<Bits>(cell) - reinterpret_cast<Bits>(this)) * ownerSetsPerBlock / blockSize;
index 939575f..022a173 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "MarkedBlock.h"
 #include "TinyBloomFilter.h"
+#include <wtf/HashSet.h>
 
 namespace JSC {
 
index b591d4b..29eee95 100644 (file)
@@ -48,6 +48,10 @@ void NewSpace::addBlock(SizeClass& sizeClass, MarkedBlock* block)
     block->setInNewSpace(true);
     sizeClass.nextBlock = block;
     sizeClass.blockList.append(block);
+    ASSERT(!sizeClass.currentBlock);
+    ASSERT(!sizeClass.firstFreeCell);
+    sizeClass.currentBlock = block;
+    sizeClass.firstFreeCell = block->blessNewBlockForFastPath();
 }
 
 void NewSpace::removeBlock(MarkedBlock* block)
@@ -69,4 +73,13 @@ void NewSpace::resetAllocator()
         sizeClassFor(cellSize).resetAllocator();
 }
 
+void NewSpace::canonicalizeBlocks()
+{
+    for (size_t cellSize = preciseStep; cellSize < preciseCutoff; cellSize += preciseStep)
+        sizeClassFor(cellSize).canonicalizeBlock();
+
+    for (size_t cellSize = impreciseStep; cellSize < impreciseCutoff; cellSize += impreciseStep)
+        sizeClassFor(cellSize).canonicalizeBlock();
+}
+
 } // namespace JSC
index 848caf6..62c8f5b 100644 (file)
@@ -50,7 +50,10 @@ namespace JSC {
         struct SizeClass {
             SizeClass();
             void resetAllocator();
+            void canonicalizeBlock();
 
+            MarkedBlock::FreeCell* firstFreeCell;
+            MarkedBlock* currentBlock;
             MarkedBlock* nextBlock;
             DoublyLinkedList<MarkedBlock> blockList;
             size_t cellSize;
@@ -64,6 +67,8 @@ namespace JSC {
 
         void addBlock(SizeClass&, MarkedBlock*);
         void removeBlock(MarkedBlock*);
+        
+        void canonicalizeBlocks();
 
         size_t waterMark();
         size_t highWaterMark();
@@ -115,14 +120,43 @@ namespace JSC {
 
     inline void* NewSpace::allocate(SizeClass& sizeClass)
     {
-        for (MarkedBlock*& block = sizeClass.nextBlock ; block; block = block->next()) {
-            if (void* result = block->allocate())
-                return result;
-
-            m_waterMark += block->capacity();
+        MarkedBlock::FreeCell* firstFreeCell = sizeClass.firstFreeCell;
+        if (!firstFreeCell) {
+            // There are two possibilities for why we got here:
+            // 1) We've exhausted the allocation cache for currentBlock, in which case
+            //    currentBlock == nextBlock, and we know that there is no reason to
+            //    repeat a lazy sweep of nextBlock because we won't find anything.
+            // 2) Allocation caches have been cleared, in which case nextBlock may
+            //    have (and most likely does have) free cells, so we almost certainly
+            //    should do a lazySweep for nextBlock. This also implies that
+            //    currentBlock == 0.
+            
+            if (sizeClass.currentBlock) {
+                ASSERT(sizeClass.currentBlock == sizeClass.nextBlock);
+                m_waterMark += sizeClass.nextBlock->capacity();
+                sizeClass.nextBlock = sizeClass.nextBlock->next();
+                sizeClass.currentBlock = 0;
+            }
+            
+            for (MarkedBlock*& block = sizeClass.nextBlock ; block; block = block->next()) {
+                firstFreeCell = block->lazySweep();
+                if (firstFreeCell) {
+                    sizeClass.firstFreeCell = firstFreeCell;
+                    sizeClass.currentBlock = block;
+                    break;
+                }
+                
+                m_waterMark += block->capacity();
+            }
+            
+            if (!firstFreeCell)
+                return 0;
         }
-
-        return 0;
+        
+        ASSERT(firstFreeCell);
+        
+        sizeClass.firstFreeCell = firstFreeCell->next;
+        return firstFreeCell;
     }
 
     template <typename Functor> inline typename Functor::ReturnType NewSpace::forEachBlock(Functor& functor)
@@ -155,7 +189,9 @@ namespace JSC {
     }
 
     inline NewSpace::SizeClass::SizeClass()
-        : nextBlock(0)
+        : firstFreeCell(0)
+        , currentBlock(0)
+        , nextBlock(0)
         , cellSize(0)
     {
     }
@@ -164,6 +200,19 @@ namespace JSC {
     {
         nextBlock = blockList.head();
     }
+    
+    inline void NewSpace::SizeClass::canonicalizeBlock()
+    {
+        if (currentBlock) {
+            currentBlock->canonicalizeBlock(firstFreeCell);
+            firstFreeCell = 0;
+        }
+        
+        ASSERT(!firstFreeCell);
+        
+        currentBlock = 0;
+        firstFreeCell = 0;
+    }
 
 } // namespace JSC
 
index 549ba93..47dd8a5 100644 (file)
@@ -36,6 +36,7 @@ OldSpace::OldSpace(Heap* heap)
 void OldSpace::addBlock(MarkedBlock* block)
 {
     m_blocks.append(block);
+    block->blessNewBlockForSlowPath();
 }
 
 void OldSpace::removeBlock(MarkedBlock* block)