B3 should reduce obvious forms of Shl(SShr)
authorfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 22 Jan 2016 20:03:01 +0000 (20:03 +0000)
committerfpizlo@apple.com <fpizlo@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Fri, 22 Jan 2016 20:03:01 +0000 (20:03 +0000)
https://bugs.webkit.org/show_bug.cgi?id=153362

Reviewed by Mark Lam and Saam Barati.

This is a 40% speed-up in AsmBench-0.9/dry.c.js.

* b3/B3ReduceStrength.cpp:
* b3/testb3.cpp:
(JSC::B3::testStore16Load16Z):
(JSC::B3::testSShrShl32):
(JSC::B3::testSShrShl64):
(JSC::B3::zero):
(JSC::B3::run):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@195466 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/b3/B3ReduceStrength.cpp
Source/JavaScriptCore/b3/testb3.cpp

index 60fd3c1..622592d 100644 (file)
@@ -1,3 +1,20 @@
+2016-01-22  Filip Pizlo  <fpizlo@apple.com>
+
+        B3 should reduce obvious forms of Shl(SShr)
+        https://bugs.webkit.org/show_bug.cgi?id=153362
+
+        Reviewed by Mark Lam and Saam Barati.
+
+        This is a 40% speed-up in AsmBench-0.9/dry.c.js.
+
+        * b3/B3ReduceStrength.cpp:
+        * b3/testb3.cpp:
+        (JSC::B3::testStore16Load16Z):
+        (JSC::B3::testSShrShl32):
+        (JSC::B3::testSShrShl64):
+        (JSC::B3::zero):
+        (JSC::B3::run):
+
 2016-01-22  Alex Christensen  <achristensen@webkit.org>
 
         Fix internal Windows build
index abb3a85..add8e29 100644 (file)
@@ -743,6 +743,55 @@ private:
                 break;
             }
 
+            if (m_value->child(1)->hasInt32()
+                && m_value->child(0)->opcode() == Shl
+                && m_value->child(0)->child(1)->hasInt32()
+                && m_value->child(1)->asInt32() == m_value->child(0)->child(1)->asInt32()) {
+                switch (m_value->child(1)->asInt32()) {
+                case 16:
+                    if (m_value->type() == Int32) {
+                        // Turn this: SShr(Shl(value, 16), 16)
+                        // Into this: SExt16(value)
+                        replaceWithNewValue(
+                            m_proc.add<Value>(
+                                SExt16, m_value->origin(), m_value->child(0)->child(0)));
+                    }
+                    break;
+
+                case 24:
+                    if (m_value->type() == Int32) {
+                        // Turn this: SShr(Shl(value, 24), 24)
+                        // Into this: SExt8(value)
+                        replaceWithNewValue(
+                            m_proc.add<Value>(
+                                SExt8, m_value->origin(), m_value->child(0)->child(0)));
+                    }
+                    break;
+
+                case 32:
+                    if (m_value->type() == Int64) {
+                        // Turn this: SShr(Shl(value, 32), 32)
+                        // Into this: SExt32(Trunc(value))
+                        replaceWithNewValue(
+                            m_proc.add<Value>(
+                                SExt32, m_value->origin(),
+                                m_insertionSet.insert<Value>(
+                                    m_index, Trunc, m_value->origin(),
+                                    m_value->child(0)->child(0))));
+                    }
+                    break;
+
+                // FIXME: Add cases for 48 and 56, but that would translate to SExt32(SExt8) or
+                // SExt32(SExt16), which we don't currently lower efficiently.
+
+                default:
+                    break;
+                }
+
+                if (m_value->opcode() != SShr)
+                    break;
+            }
+
             if (handleShiftByZero())
                 break;
 
index c3e0076..47d2109 100644 (file)
@@ -9501,6 +9501,48 @@ void testStore16Load16Z(int32_t value)
     CHECK(compileAndRun<int32_t>(proc, value) == static_cast<uint16_t>(value));
 }
 
+void testSShrShl32(int32_t value, int32_t sshrAmount, int32_t shlAmount)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+
+    root->appendNew<ControlValue>(
+        proc, Return, Origin(),
+        root->appendNew<Value>(
+            proc, SShr, Origin(),
+            root->appendNew<Value>(
+                proc, Shl, Origin(),
+                root->appendNew<Value>(
+                    proc, Trunc, Origin(),
+                    root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)),
+                root->appendNew<Const32Value>(proc, Origin(), shlAmount)),
+            root->appendNew<Const32Value>(proc, Origin(), sshrAmount)));
+
+    CHECK(
+        compileAndRun<int32_t>(proc, value)
+        == ((value << (shlAmount & 31)) >> (sshrAmount & 31)));
+}
+
+void testSShrShl64(int64_t value, int32_t sshrAmount, int32_t shlAmount)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+
+    root->appendNew<ControlValue>(
+        proc, Return, Origin(),
+        root->appendNew<Value>(
+            proc, SShr, Origin(),
+            root->appendNew<Value>(
+                proc, Shl, Origin(),
+                root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0),
+                root->appendNew<Const32Value>(proc, Origin(), shlAmount)),
+            root->appendNew<Const32Value>(proc, Origin(), sshrAmount)));
+
+    CHECK(
+        compileAndRun<int64_t>(proc, value)
+        == ((value << (shlAmount & 63)) >> (sshrAmount & 63)));
+}
+
 // Make sure the compiler does not try to optimize anything out.
 NEVER_INLINE double zero()
 {
@@ -10785,6 +10827,95 @@ void run(const char* filter)
     RUN(testStore16Load16Z(12345678));
     RUN(testStore16Load16Z(-123));
 
+    RUN(testSShrShl32(42, 24, 24));
+    RUN(testSShrShl32(-42, 24, 24));
+    RUN(testSShrShl32(4200, 24, 24));
+    RUN(testSShrShl32(-4200, 24, 24));
+    RUN(testSShrShl32(4200000, 24, 24));
+    RUN(testSShrShl32(-4200000, 24, 24));
+
+    RUN(testSShrShl32(42, 16, 16));
+    RUN(testSShrShl32(-42, 16, 16));
+    RUN(testSShrShl32(4200, 16, 16));
+    RUN(testSShrShl32(-4200, 16, 16));
+    RUN(testSShrShl32(4200000, 16, 16));
+    RUN(testSShrShl32(-4200000, 16, 16));
+
+    RUN(testSShrShl32(42, 8, 8));
+    RUN(testSShrShl32(-42, 8, 8));
+    RUN(testSShrShl32(4200, 8, 8));
+    RUN(testSShrShl32(-4200, 8, 8));
+    RUN(testSShrShl32(4200000, 8, 8));
+    RUN(testSShrShl32(-4200000, 8, 8));
+    RUN(testSShrShl32(420000000, 8, 8));
+    RUN(testSShrShl32(-420000000, 8, 8));
+
+    RUN(testSShrShl64(42, 56, 56));
+    RUN(testSShrShl64(-42, 56, 56));
+    RUN(testSShrShl64(4200, 56, 56));
+    RUN(testSShrShl64(-4200, 56, 56));
+    RUN(testSShrShl64(4200000, 56, 56));
+    RUN(testSShrShl64(-4200000, 56, 56));
+    RUN(testSShrShl64(420000000, 56, 56));
+    RUN(testSShrShl64(-420000000, 56, 56));
+    RUN(testSShrShl64(42000000000, 56, 56));
+    RUN(testSShrShl64(-42000000000, 56, 56));
+
+    RUN(testSShrShl64(42, 48, 48));
+    RUN(testSShrShl64(-42, 48, 48));
+    RUN(testSShrShl64(4200, 48, 48));
+    RUN(testSShrShl64(-4200, 48, 48));
+    RUN(testSShrShl64(4200000, 48, 48));
+    RUN(testSShrShl64(-4200000, 48, 48));
+    RUN(testSShrShl64(420000000, 48, 48));
+    RUN(testSShrShl64(-420000000, 48, 48));
+    RUN(testSShrShl64(42000000000, 48, 48));
+    RUN(testSShrShl64(-42000000000, 48, 48));
+
+    RUN(testSShrShl64(42, 32, 32));
+    RUN(testSShrShl64(-42, 32, 32));
+    RUN(testSShrShl64(4200, 32, 32));
+    RUN(testSShrShl64(-4200, 32, 32));
+    RUN(testSShrShl64(4200000, 32, 32));
+    RUN(testSShrShl64(-4200000, 32, 32));
+    RUN(testSShrShl64(420000000, 32, 32));
+    RUN(testSShrShl64(-420000000, 32, 32));
+    RUN(testSShrShl64(42000000000, 32, 32));
+    RUN(testSShrShl64(-42000000000, 32, 32));
+
+    RUN(testSShrShl64(42, 24, 24));
+    RUN(testSShrShl64(-42, 24, 24));
+    RUN(testSShrShl64(4200, 24, 24));
+    RUN(testSShrShl64(-4200, 24, 24));
+    RUN(testSShrShl64(4200000, 24, 24));
+    RUN(testSShrShl64(-4200000, 24, 24));
+    RUN(testSShrShl64(420000000, 24, 24));
+    RUN(testSShrShl64(-420000000, 24, 24));
+    RUN(testSShrShl64(42000000000, 24, 24));
+    RUN(testSShrShl64(-42000000000, 24, 24));
+
+    RUN(testSShrShl64(42, 16, 16));
+    RUN(testSShrShl64(-42, 16, 16));
+    RUN(testSShrShl64(4200, 16, 16));
+    RUN(testSShrShl64(-4200, 16, 16));
+    RUN(testSShrShl64(4200000, 16, 16));
+    RUN(testSShrShl64(-4200000, 16, 16));
+    RUN(testSShrShl64(420000000, 16, 16));
+    RUN(testSShrShl64(-420000000, 16, 16));
+    RUN(testSShrShl64(42000000000, 16, 16));
+    RUN(testSShrShl64(-42000000000, 16, 16));
+
+    RUN(testSShrShl64(42, 8, 8));
+    RUN(testSShrShl64(-42, 8, 8));
+    RUN(testSShrShl64(4200, 8, 8));
+    RUN(testSShrShl64(-4200, 8, 8));
+    RUN(testSShrShl64(4200000, 8, 8));
+    RUN(testSShrShl64(-4200000, 8, 8));
+    RUN(testSShrShl64(420000000, 8, 8));
+    RUN(testSShrShl64(-420000000, 8, 8));
+    RUN(testSShrShl64(42000000000, 8, 8));
+    RUN(testSShrShl64(-42000000000, 8, 8));
+
     if (tasks.isEmpty())
         usage();