Reviewed by Adam.
authormjs <mjs@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 19 Oct 2007 02:14:09 +0000 (02:14 +0000)
committermjs <mjs@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 19 Oct 2007 02:14:09 +0000 (02:14 +0000)
        - use __declspec(thread) for fast thread-local storage on Windows

        - 2.2% speedup on sunspider (on Windows)
        - 7% speedup on the string section
        - 6% speedup on JS iBench

        - fixed <rdar://problem/5473084> PLT on Windows got 2.5% slower between r25406 and r25422
        - fixed at least some of <rdar://5527965? i-Bench JS was 14% slower in 310A11 than 310A10

        * wtf/FastMalloc.cpp:
        (WTF::getThreadHeap):
        (WTF::setThreadHeap):
        (WTF::TCMalloc_ThreadCache::GetCache):
        (WTF::TCMalloc_ThreadCache::GetCacheIfPresent):
        (WTF::TCMalloc_ThreadCache::CreateCacheIfNecessary):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@26760 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/wtf/FastMalloc.cpp

index fd67081..334d9d0 100644 (file)
@@ -1,3 +1,24 @@
+2007-10-18  Maciej Stachowiak  <mjs@apple.com>
+
+        Reviewed by Adam.
+        
+        - use __declspec(thread) for fast thread-local storage on Windows
+        
+        - 2.2% speedup on sunspider (on Windows)
+        - 7% speedup on the string section
+        - 6% speedup on JS iBench
+        
+        - fixed <rdar://problem/5473084> PLT on Windows got 2.5% slower between r25406 and r25422
+        - fixed at least some of <rdar://5527965? i-Bench JS was 14% slower in 310A11 than 310A10
+        
+        
+        * wtf/FastMalloc.cpp:
+        (WTF::getThreadHeap):
+        (WTF::setThreadHeap):
+        (WTF::TCMalloc_ThreadCache::GetCache):
+        (WTF::TCMalloc_ThreadCache::GetCacheIfPresent):
+        (WTF::TCMalloc_ThreadCache::CreateCacheIfNecessary):
+
 2007-10-17  Darin Adler  <darin@apple.com>
 
         Reviewed by Mark Rowe.
index 13f3261..4dc3f1c 100644 (file)
@@ -1184,7 +1184,7 @@ class TCMalloc_ThreadCache {
   static void                  InitTSD();
   static TCMalloc_ThreadCache* GetCache();
   static TCMalloc_ThreadCache* GetCacheIfPresent();
-  static void*                 CreateCacheIfNecessary();
+  static TCMalloc_ThreadCache* CreateCacheIfNecessary();
   static void                  DeleteCache(void* ptr);
   static void                  RecomputeThreadCacheSize();
 
@@ -1292,6 +1292,28 @@ static inline TCMalloc_PageHeap* getPageHeap()
 // Until then, we use a slow path to get the heap object.
 static bool tsd_inited = false;
 static pthread_key_t heap_key;
+#if COMPILER(MSVC)
+__declspec(thread) TCMalloc_ThreadCache* threadHeap;
+#endif
+
+static ALWAYS_INLINE TCMalloc_ThreadCache* getThreadHeap()
+{
+#if COMPILER(MSVC)
+    return threadHeap;
+#else
+    return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
+#endif
+}
+
+static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
+{
+    // still do pthread_setspecific when using MSVC fast TLS to
+    // benefit from the delete callback.
+    pthread_setspecific(heap_key, heap);
+#if COMPILER(MSVC)
+    threadHeap = heap;
+#endif
+}
 
 // Allocator for thread heaps
 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
@@ -1557,14 +1579,14 @@ inline void TCMalloc_ThreadCache::Scavenge() {
 }
 
 ALWAYS_INLINE TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
-  void* ptr = NULL;
+  TCMalloc_ThreadCache* ptr = NULL;
   if (!tsd_inited) {
     InitModule();
   } else {
-    ptr = pthread_getspecific(heap_key);
+      ptr = getThreadHeap();
   }
   if (ptr == NULL) ptr = CreateCacheIfNecessary();
-  return static_cast<TCMalloc_ThreadCache*>(ptr);
+  return ptr;
 }
 
 // In deletion paths, we do not try to create a thread-cache.  This is
@@ -1572,7 +1594,7 @@ ALWAYS_INLINE TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
 // already cleaned up the cache for this thread.
 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
   if (!tsd_inited) return NULL;
-  return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
+  return getThreadHeap();
 }
 
 void TCMalloc_ThreadCache::PickNextSample() {
@@ -1637,7 +1659,7 @@ void TCMalloc_ThreadCache::InitTSD() {
   }
 }
 
-void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
+TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
   // Initialize per-thread data if necessary
   TCMalloc_ThreadCache* heap = NULL;
   {
@@ -1680,7 +1702,7 @@ void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
   // linked list of heaps.
   if (!heap->setspecific_ && tsd_inited) {
     heap->setspecific_ = true;
-    pthread_setspecific(heap_key, heap);
+    setThreadHeap(heap);
   }
   return heap;
 }