Value profiling should just use two buckets
authorfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 6 Nov 2011 11:54:59 +0000 (11:54 +0000)
committerfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 6 Nov 2011 11:54:59 +0000 (11:54 +0000)
https://bugs.webkit.org/show_bug.cgi?id=71619

Reviewed by Gavin Barraclough.

Added one more configuration options (like Heuristics::minimumOptimizationDelay),
improved debugging in JIT optimization support, changed the number of buckets
in the value profile from 9 to 2, and wrote a more optimal value profiling path
in the old JIT to take advantage of this. It's still possible to play around with
larger numbers of buckets, and we should probably keep this for a little while
until we convince ourselves that using just two buckets is the right call.

* bytecode/CodeBlock.cpp:
(JSC::CodeBlock::shouldOptimizeNow):
* bytecode/ValueProfile.h:
* jit/JITInlineMethods.h:
(JSC::JIT::emitValueProfilingSite):
* jit/JITStubs.cpp:
(JSC::DEFINE_STUB_FUNCTION):
* runtime/Heuristics.cpp:
(JSC::Heuristics::initializeHeuristics):
* runtime/Heuristics.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@99375 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/bytecode/CodeBlock.cpp
Source/JavaScriptCore/bytecode/ValueProfile.h
Source/JavaScriptCore/jit/JITInlineMethods.h
Source/JavaScriptCore/jit/JITStubs.cpp
Source/JavaScriptCore/runtime/Heuristics.cpp
Source/JavaScriptCore/runtime/Heuristics.h

index 0b72c853a2925b82f0c8034c530d1c01976a19eb..633fd3eb1fa0e3a1d083d4a609b1bd5f6f36fe29 100644 (file)
@@ -1,3 +1,28 @@
+2011-11-05  Filip Pizlo  <fpizlo@apple.com>
+
+        Value profiling should just use two buckets
+        https://bugs.webkit.org/show_bug.cgi?id=71619
+
+        Reviewed by Gavin Barraclough.
+        
+        Added one more configuration options (like Heuristics::minimumOptimizationDelay),
+        improved debugging in JIT optimization support, changed the number of buckets
+        in the value profile from 9 to 2, and wrote a more optimal value profiling path
+        in the old JIT to take advantage of this. It's still possible to play around with
+        larger numbers of buckets, and we should probably keep this for a little while
+        until we convince ourselves that using just two buckets is the right call.
+
+        * bytecode/CodeBlock.cpp:
+        (JSC::CodeBlock::shouldOptimizeNow):
+        * bytecode/ValueProfile.h:
+        * jit/JITInlineMethods.h:
+        (JSC::JIT::emitValueProfilingSite):
+        * jit/JITStubs.cpp:
+        (JSC::DEFINE_STUB_FUNCTION):
+        * runtime/Heuristics.cpp:
+        (JSC::Heuristics::initializeHeuristics):
+        * runtime/Heuristics.h:
+
 2011-11-03  Filip Pizlo  <fpizlo@apple.com>
 
         JSC should be able to sample itself in a more flexible way than just sampling flags
index 48d732be1183cd7496c57b7191560137d1acddf8..d560eace2ee0f3daaeb75e183862db7dd1c16fe4 100644 (file)
@@ -1956,9 +1956,11 @@ bool CodeBlock::shouldOptimizeNow()
 #endif
 
     if ((!numberOfNonArgumentValueProfiles || (double)numberOfLiveNonArgumentValueProfiles / numberOfNonArgumentValueProfiles >= Heuristics::desiredProfileLivenessRate)
-        && (!numberOfValueProfiles() || (double)numberOfSamplesInProfiles / ValueProfile::numberOfBuckets / numberOfValueProfiles() >= Heuristics::desiredProfileFullnessRate))
+        && (!numberOfValueProfiles() || (double)numberOfSamplesInProfiles / ValueProfile::numberOfBuckets / numberOfValueProfiles() >= Heuristics::desiredProfileFullnessRate)
+        && static_cast<unsigned>(m_optimizationDelayCounter) + 1 >= Heuristics::minimumOptimizationDelay)
         return true;
     
+    ASSERT(m_optimizationDelayCounter < std::numeric_limits<uint8_t>::max());
     m_optimizationDelayCounter++;
     optimizeAfterWarmUp();
     return false;
index ec4ea71057b10eff5710d1845ce623d908604335..b7e920e369c49b6006f683516a4a7a4663b34fcf 100644 (file)
@@ -38,7 +38,7 @@ namespace JSC {
 
 #if ENABLE(VALUE_PROFILER)
 struct ValueProfile {
-    static const unsigned logNumberOfBuckets = 3; // 8 buckets
+    static const unsigned logNumberOfBuckets = 0; // 1 bucket
     static const unsigned numberOfBuckets = 1 << logNumberOfBuckets;
     static const unsigned numberOfSpecFailBuckets = 1;
     static const unsigned bucketIndexMask = numberOfBuckets - 1;
index cdb9859adbc908b36f943c2c16039e3f11f31254..375eab1421e22ecda033329ce64626b677c8f589 100644 (file)
@@ -462,6 +462,9 @@ inline void JIT::emitValueProfilingSite(ValueProfilingSiteKind siteKind)
         return;
     
     const RegisterID value = regT0;
+#if USE(JSVALUE32_64)
+    const RegisterID valueTag = regT1;
+#endif
     const RegisterID scratch = regT3;
     
     ValueProfile* valueProfile;
@@ -474,6 +477,19 @@ inline void JIT::emitValueProfilingSite(ValueProfilingSiteKind siteKind)
     
     ASSERT(valueProfile);
     
+    if (ValueProfile::numberOfBuckets == 1) {
+        // We're in a simple configuration: only one bucket, so we can just do a direct
+        // store.
+#if USE(JSVALUE64)
+        storePtr(value, valueProfile->m_buckets);
+#else
+        EncodedValueDescriptor* descriptor = bitwise_cast<EncodedValueDescriptor*>(valueProfile->m_buckets);
+        store32(value, &descriptor->asBits.payload);
+        store32(valueTag, &descriptor->asBits.tag);
+#endif
+        return;
+    }
+    
     if (m_randomGenerator.getUint32() & 1)
         add32(Imm32(1), bucketCounterRegister);
     else
@@ -483,7 +499,6 @@ inline void JIT::emitValueProfilingSite(ValueProfilingSiteKind siteKind)
 #if USE(JSVALUE64)
     storePtr(value, BaseIndex(scratch, bucketCounterRegister, TimesEight));
 #elif USE(JSVALUE32_64)
-    const RegisterID valueTag = regT1;
     store32(value, BaseIndex(scratch, bucketCounterRegister, TimesEight, OBJECT_OFFSETOF(JSValue, u.asBits.payload)));
     store32(valueTag, BaseIndex(scratch, bucketCounterRegister, TimesEight, OBJECT_OFFSETOF(JSValue, u.asBits.tag)));
 #endif
index beb051c81ca4fb23f79c39681e3b3bd9f56bd3ce..696da5fba7783ab740176aa997e85db306a0aa77 100644 (file)
@@ -1958,7 +1958,7 @@ DEFINE_STUB_FUNCTION(void, optimize_from_loop)
     
     if (void* address = DFG::prepareOSREntry(callFrame, optimizedCodeBlock, bytecodeIndex)) {
 #if ENABLE(JIT_VERBOSE_OSR)
-        printf("Optimizing %p from loop succeeded, performing OSR.\n", codeBlock);
+        printf("Optimizing %p from loop succeeded, performing OSR after a delay of %u.\n", codeBlock, codeBlock->optimizationDelayCounter());
 #endif
 
         codeBlock->optimizeSoon();
@@ -1968,7 +1968,7 @@ DEFINE_STUB_FUNCTION(void, optimize_from_loop)
     }
     
 #if ENABLE(JIT_VERBOSE_OSR)
-    printf("Optimizing %p from loop succeeded, OSR failed.\n", codeBlock);
+    printf("Optimizing %p from loop succeeded, OSR failed, after a delay of %u.\n", codeBlock, codeBlock->optimizationDelayCounter());
 #endif
 
     // Count the OSR failure as a speculation failure. If this happens a lot, then
@@ -2058,7 +2058,7 @@ DEFINE_STUB_FUNCTION(void, optimize_from_ret)
     ASSERT(codeBlock->replacement()->getJITType() == JITCode::DFGJIT);
 
 #if ENABLE(JIT_VERBOSE_OSR)
-    printf("Optimizing %p from return succeeded.\n", codeBlock);
+    printf("Optimizing %p from return succeeded after a delay of %u.\n", codeBlock, codeBlock->optimizationDelayCounter());
 #endif
     
     codeBlock->optimizeSoon();
index 4469ac1ec6354cc0bfe754a32aad0222636f00d8..b5fdd8a47367070033c54301af3485a5efed27c8 100644 (file)
@@ -74,6 +74,7 @@ unsigned largeFailCountThresholdBaseForLoop;
 unsigned reoptimizationRetryCounterMax;
 unsigned reoptimizationRetryCounterStep;
 
+unsigned minimumOptimizationDelay;
 unsigned maximumOptimizationDelay;
 double desiredProfileLivenessRate;
 double desiredProfileFullnessRate;
@@ -157,6 +158,7 @@ void initializeHeuristics()
 
     SET(reoptimizationRetryCounterStep, 1);
 
+    SET(minimumOptimizationDelay,   1);
     SET(maximumOptimizationDelay,   5);
     SET(desiredProfileLivenessRate, 0.75);
     SET(desiredProfileFullnessRate, 0.35);
index fed0e1f09230fdf4bea41b6bd2ef82c4dec7f143..a14859fa8dca12acc1dd3005bca79ba2838b70e1 100644 (file)
@@ -60,6 +60,7 @@ extern unsigned largeFailCountThresholdBaseForLoop;
 extern unsigned reoptimizationRetryCounterMax;
 extern unsigned reoptimizationRetryCounterStep;
 
+extern unsigned minimumOptimizationDelay;
 extern unsigned maximumOptimizationDelay;
 extern double desiredProfileLivenessRate;
 extern double desiredProfileFullnessRate;