Add support for Wasm ctz and popcnt
authorkeith_miller@apple.com <keith_miller@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 4 Dec 2016 22:47:02 +0000 (22:47 +0000)
committerkeith_miller@apple.com <keith_miller@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sun, 4 Dec 2016 22:47:02 +0000 (22:47 +0000)
https://bugs.webkit.org/show_bug.cgi?id=165369

Reviewed by Saam Barati.

JSTests:

* wasm/function-tests/ctz.js: Added.
* wasm/function-tests/popcnt.js: Added.

Source/JavaScriptCore:

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::countTrailingZeros32):
(JSC::MacroAssemblerARM64::countTrailingZeros64):
* assembler/MacroAssemblerX86Common.cpp:
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::countTrailingZeros32):
(JSC::MacroAssemblerX86Common::supportsBMI1):
(JSC::MacroAssemblerX86Common::ctzAfterBsf):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::countTrailingZeros64):
* assembler/X86Assembler.h:
(JSC::X86Assembler::tzcnt_rr):
(JSC::X86Assembler::tzcntq_rr):
(JSC::X86Assembler::bsf_rr):
(JSC::X86Assembler::bsfq_rr):
* wasm/WasmB3IRGenerator.cpp:
(JSC::Wasm::B3IRGenerator::addOp<OpType::I32Ctz>):
(JSC::Wasm::B3IRGenerator::addOp<OpType::I64Ctz>):
(JSC::Wasm::B3IRGenerator::addOp<OpType::I32Popcnt>):
(JSC::Wasm::B3IRGenerator::addOp<OpType::I64Popcnt>):
* wasm/WasmFunctionParser.h:
(JSC::Wasm::FunctionParser<Context>::parseExpression):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@209313 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JSTests/ChangeLog
JSTests/wasm/function-tests/ctz.js [new file with mode: 0644]
JSTests/wasm/function-tests/popcnt.js [new file with mode: 0644]
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp
Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h
Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
Source/JavaScriptCore/assembler/X86Assembler.h
Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp
Source/JavaScriptCore/wasm/WasmFunctionParser.h

index 40e7a8c..cb94707 100644 (file)
@@ -1,3 +1,13 @@
+2016-12-04  Keith Miller  <keith_miller@apple.com>
+
+        Add support for Wasm ctz and popcnt
+        https://bugs.webkit.org/show_bug.cgi?id=165369
+
+        Reviewed by Saam Barati.
+
+        * wasm/function-tests/ctz.js: Added.
+        * wasm/function-tests/popcnt.js: Added.
+
 2016-12-03  JF Bastien  <jfbastien@apple.com>
 
         WebAssembly: update binary format to 0xD version
diff --git a/JSTests/wasm/function-tests/ctz.js b/JSTests/wasm/function-tests/ctz.js
new file mode 100644 (file)
index 0000000..496b340
--- /dev/null
@@ -0,0 +1,35 @@
+import Builder from '../Builder.js'
+
+const b = new Builder();
+b.Type().End()
+    .Function().End()
+    .Code()
+    .Function({ params: ["i32"], ret: "i32" }, [])
+    .GetLocal(0)
+    .I32Ctz()
+    .End()
+
+    .Function({ params: ["i64"], ret: "i64" }, [])
+    .GetLocal(0)
+    .I64Ctz()
+    .End()
+
+const bin = b.WebAssembly()
+bin.trim();
+testWasmModuleFunctions(bin.get(), 2,
+                        [[{type: "i32", value: "0" }, [{ type: "i32", value: "-1" }]],
+                         [{type: "i32", value: "32" }, [{ type: "i32", value: "0" }]],
+                         [{type: "i32", value: "15" }, [{ type: "i32", value: "0x00008000" }]],
+                         [{type: "i32", value: "16" }, [{ type: "i32", value: "0x00010000" }]],
+                         [{type: "i32", value: "31" }, [{ type: "i32", value: "0x80000000" }]],
+                         [{type: "i32", value: "0" }, [{ type: "i32", value: "0x7fffffff" }]],
+                        ],
+
+                        [[{type: "i64", value: "0" }, [{ type: "i64", value: "-1" }]],
+                         [{type: "i64", value: "64" }, [{ type: "i64", value: "0" }]],
+                         [{type: "i64", value: "15" }, [{ type: "i64", value: "0x00008000" }]],
+                         [{type: "i64", value: "16" }, [{ type: "i64", value: "0x00010000" }]],
+                         [{type: "i64", value: "63" }, [{ type: "i64", value: "0x8000000000000000" }]],
+                         [{type: "i64", value: "0" }, [{ type: "i64", value: "0x7fffffffffffffff" }]],
+                         ]
+                       );
diff --git a/JSTests/wasm/function-tests/popcnt.js b/JSTests/wasm/function-tests/popcnt.js
new file mode 100644 (file)
index 0000000..7972bee
--- /dev/null
@@ -0,0 +1,40 @@
+import Builder from '../Builder.js'
+
+const b = new Builder();
+b.Type().End()
+    .Function().End()
+    .Code()
+    .Function({ params: ["i32"], ret: "i32" }, [])
+    .GetLocal(0)
+    .I32Popcnt()
+    .End()
+
+    .Function({ params: ["i64"], ret: "i64" }, [])
+    .GetLocal(0)
+    .I64Popcnt()
+    .End()
+
+const bin = b.WebAssembly()
+bin.trim();
+testWasmModuleFunctions(bin.get(), 2,
+                        [[{type: "i32", value: "32" }, [{ type: "i32", value: "-1" }]],
+                         [{type: "i32", value: "0" }, [{ type: "i32", value: "0" }]],
+                         [{type: "i32", value: "1" }, [{ type: "i32", value: "0x00008000" }]],
+                         [{type: "i32", value: "2" }, [{ type: "i32", value: "0x80008000" }]],
+                         [{type: "i32", value: "31" }, [{ type: "i32", value: "0x7fffffff" }]],
+                         [{type: "i32", value: "16" }, [{ type: "i32", value: "0xaaaaaaaa" }]],
+                         [{type: "i32", value: "16" }, [{ type: "i32", value: "0x55555555" }]],
+                         [{type: "i32", value: "24" }, [{ type: "i32", value: "0xdeadbeef" }]],
+                        ],
+
+
+                        [[{type: "i64", value: "64" }, [{ type: "i64", value: "-1" }]],
+                         [{type: "i64", value: "0" }, [{ type: "i64", value: "0" }]],
+                         [{type: "i64", value: "1" }, [{ type: "i64", value: "0x00008000" }]],
+                         [{type: "i64", value: "4" }, [{ type: "i64", value: "0x8000800080008000" }]],
+                         [{type: "i64", value: "63" }, [{ type: "i64", value: "0x7fffffffffffffff" }]],
+                         [{type: "i64", value: "32" }, [{ type: "i64", value: "0xaaaaaaaa55555555" }]],
+                         [{type: "i64", value: "32" }, [{ type: "i64", value: "0x99999999aaaaaaaa" }]],
+                         [{type: "i64", value: "48" }, [{ type: "i64", value: "0xdeadbeefdeadbeef" }]],
+                        ]
+                       );
index 1fa1da6..1e8e542 100644 (file)
@@ -1,3 +1,33 @@
+2016-12-04  Keith Miller  <keith_miller@apple.com>
+
+        Add support for Wasm ctz and popcnt
+        https://bugs.webkit.org/show_bug.cgi?id=165369
+
+        Reviewed by Saam Barati.
+
+        * assembler/MacroAssemblerARM64.h:
+        (JSC::MacroAssemblerARM64::countTrailingZeros32):
+        (JSC::MacroAssemblerARM64::countTrailingZeros64):
+        * assembler/MacroAssemblerX86Common.cpp:
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::countTrailingZeros32):
+        (JSC::MacroAssemblerX86Common::supportsBMI1):
+        (JSC::MacroAssemblerX86Common::ctzAfterBsf):
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::countTrailingZeros64):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::tzcnt_rr):
+        (JSC::X86Assembler::tzcntq_rr):
+        (JSC::X86Assembler::bsf_rr):
+        (JSC::X86Assembler::bsfq_rr):
+        * wasm/WasmB3IRGenerator.cpp:
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I32Ctz>):
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I64Ctz>):
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I32Popcnt>):
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I64Popcnt>):
+        * wasm/WasmFunctionParser.h:
+        (JSC::Wasm::FunctionParser<Context>::parseExpression):
+
 2016-12-04  Saam Barati  <sbarati@apple.com>
 
         We should have a Wasm callee
index 0027a84..6334fbf 100644 (file)
@@ -427,6 +427,20 @@ public:
         m_assembler.clz<64>(dest, src);
     }
 
+    void countTrailingZeros32(RegisterID src, RegisterID dest)
+    {
+        // Arm does not have a count trailing zeros only a count leading zeros.
+        m_assembler.rbit<32>(dest, src);
+        m_assembler.clz<32>(dest, dest);
+    }
+
+    void countTrailingZeros64(RegisterID src, RegisterID dest)
+    {
+        // Arm does not have a count trailing zeros only a count leading zeros.
+        m_assembler.rbit<64>(dest, src);
+        m_assembler.clz<64>(dest, dest);
+    }
+
     void lshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest)
     {
         m_assembler.lsl<32>(dest, src, shiftAmount);
index 8751225..528c60f 100644 (file)
@@ -555,6 +555,7 @@ MacroAssemblerX86Common::SSE2CheckState MacroAssemblerX86Common::s_sse2CheckStat
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;
+MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked;
 
 } // namespace JSC
 
index 431876b..bedb06b 100644 (file)
@@ -313,6 +313,16 @@ public:
         clz32AfterBsr(dst);
     }
 
+    void countTrailingZeros32(RegisterID src, RegisterID dst)
+    {
+        if (supportsBMI1()) {
+            m_assembler.tzcnt_rr(src, dst);
+            return;
+        }
+        m_assembler.bsf_rr(src, dst);
+        ctzAfterBsf<32>(dst);
+    }
+
     void lshift32(RegisterID shift_amount, RegisterID dest)
     {
         if (shift_amount == X86Registers::ecx)
@@ -2845,6 +2855,39 @@ protected:
         return s_lzcntCheckState == CPUIDCheckState::Set;
     }
 
+    static bool supportsBMI1()
+    {
+        if (s_bmi1CheckState == CPUIDCheckState::NotChecked) {
+            int flags = 0;
+#if COMPILER(MSVC)
+            int cpuInfo[4];
+            __cpuid(cpuInfo, 0x80000001);
+            flags = cpuInfo[2];
+#elif COMPILER(GCC_OR_CLANG)
+            asm (
+                 "movl $0x7, %%eax;"
+                 "movl $0x0, %%ecx;"
+                 "cpuid;"
+                 "movl %%ebx, %0;"
+                 : "=g" (flags)
+                 :
+                 : "%eax", "%ebx", "%ecx", "%edx"
+                 );
+#endif // COMPILER(GCC_OR_CLANG)
+            static int BMI1FeatureBit = 1 << 3;
+            s_bmi1CheckState = (flags & BMI1FeatureBit) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+        }
+        return s_bmi1CheckState == CPUIDCheckState::Set;
+    }
+
+    template<int sizeOfRegister>
+    void ctzAfterBsf(RegisterID dst)
+    {
+        Jump srcIsNonZero = m_assembler.jCC(x86Condition(NonZero));
+        move(TrustedImm32(sizeOfRegister), dst);
+        srcIsNonZero.link(this);
+    }
+
 private:
     // Only MacroAssemblerX86 should be using the following method; SSE2 is always available on
     // x86_64, and clients & subclasses of MacroAssembler should be using 'supportsFloatingPoint()'.
@@ -3015,6 +3058,7 @@ private:
     };
     JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_1CheckState;
     JS_EXPORT_PRIVATE static CPUIDCheckState s_avxCheckState;
+    static CPUIDCheckState s_bmi1CheckState;
     static CPUIDCheckState s_lzcntCheckState;
 };
 
index 67308de..5076aeb 100644 (file)
@@ -391,6 +391,16 @@ public:
         clz64AfterBsr(dst);
     }
 
+    void countTrailingZeros64(RegisterID src, RegisterID dst)
+    {
+        if (supportsBMI1()) {
+            m_assembler.tzcntq_rr(src, dst);
+            return;
+        }
+        m_assembler.bsfq_rr(src, dst);
+        ctzAfterBsf<64>(dst);
+    }
+
     void lshift64(TrustedImm32 imm, RegisterID dest)
     {
         m_assembler.shlq_i8r(imm.m_value, dest);
index 8323360..d2ddff8 100644 (file)
@@ -288,6 +288,8 @@ private:
         OP2_3BYTE_ESCAPE_AE = 0xAE,
         OP2_IMUL_GvEv       = 0xAF,
         OP2_MOVZX_GvEb      = 0xB6,
+        OP2_BSF             = 0xBC,
+        OP2_TZCNT           = 0xBC,
         OP2_BSR             = 0xBD,
         OP2_LZCNT           = 0xBD,
         OP2_MOVSX_GvEb      = 0xBE,
@@ -1018,6 +1020,32 @@ public:
     }
 #endif
 
+    void tzcnt_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_TZCNT, dst, src);
+    }
+
+#if CPU(X86_64)
+    void tzcntq_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp64(OP2_TZCNT, dst, src);
+    }
+#endif
+
+    void bsf_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.twoByteOp(OP2_BSF, dst, src);
+    }
+
+#if CPU(X86_64)
+    void bsfq_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.twoByteOp64(OP2_BSF, dst, src);
+    }
+#endif
+
 private:
     template<GroupOpcodeID op>
     void shiftInstruction32(int imm, RegisterID dst)
index de89c8c..c4100f9 100644 (file)
@@ -29,6 +29,7 @@
 #if ENABLE(WEBASSEMBLY)
 
 #include "B3BasicBlockInlines.h"
+#include "B3CCallValue.h"
 #include "B3ConstPtrValue.h"
 #include "B3FixSSA.h"
 #include "B3StackmapGenerationParams.h"
@@ -765,6 +766,54 @@ std::unique_ptr<FunctionCompilation> parseAndCompile(VM& vm, const uint8_t* func
 // Custom wasm ops. These are the ones too messy to do in wasm.json.
 
 template<>
+bool B3IRGenerator::addOp<OpType::I32Ctz>(ExpressionType arg, ExpressionType& result)
+{
+    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, Int32, Origin());
+    patchpoint->append(arg, ValueRep::SomeRegister);
+    patchpoint->setGenerator([=] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+        jit.countTrailingZeros32(params[1].gpr(), params[0].gpr());
+    });
+    patchpoint->effects = Effects::none();
+    result = patchpoint;
+    return true;
+}
+
+template<>
+bool B3IRGenerator::addOp<OpType::I64Ctz>(ExpressionType arg, ExpressionType& result)
+{
+    PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, Int64, Origin());
+    patchpoint->append(arg, ValueRep::SomeRegister);
+    patchpoint->setGenerator([=] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+        jit.countTrailingZeros64(params[1].gpr(), params[0].gpr());
+    });
+    patchpoint->effects = Effects::none();
+    result = patchpoint;
+    return true;
+}
+
+template<>
+bool B3IRGenerator::addOp<OpType::I32Popcnt>(ExpressionType arg, ExpressionType& result)
+{
+    // FIXME: This should use the popcnt instruction if SSE4 is available but we don't have code to detect SSE4 yet.
+    // see: https://bugs.webkit.org/show_bug.cgi?id=165363
+    uint32_t (*popcount)(int32_t) = [] (int32_t value) -> uint32_t { return __builtin_popcount(value); };
+    Value* funcAddress = m_currentBlock->appendNew<ConstPtrValue>(m_proc, Origin(), bitwise_cast<void*>(popcount));
+    result = m_currentBlock->appendNew<CCallValue>(m_proc, Int32, Origin(), Effects::none(), funcAddress, arg);
+    return true;
+}
+
+template<>
+bool B3IRGenerator::addOp<OpType::I64Popcnt>(ExpressionType arg, ExpressionType& result)
+{
+    // FIXME: This should use the popcnt instruction if SSE4 is available but we don't have code to detect SSE4 yet.
+    // see: https://bugs.webkit.org/show_bug.cgi?id=165363
+    uint64_t (*popcount)(int64_t) = [] (int64_t value) -> uint64_t { return __builtin_popcountll(value); };
+    Value* funcAddress = m_currentBlock->appendNew<ConstPtrValue>(m_proc, Origin(), bitwise_cast<void*>(popcount));
+    result = m_currentBlock->appendNew<CCallValue>(m_proc, Int64, Origin(), Effects::none(), funcAddress, arg);
+    return true;
+}
+
+template<>
 bool B3IRGenerator::addOp<F64ConvertUI64>(ExpressionType arg, ExpressionType& result)
 {
     PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, Double, Origin());
index 3d67426..16d1311 100644 (file)
@@ -217,6 +217,10 @@ bool FunctionParser<Context>::parseExpression(OpType op)
     case OpType::F64Nearest: return unaryCase<OpType::F64Nearest>();
     case OpType::F32Trunc: return unaryCase<OpType::F32Trunc>();
     case OpType::F64Trunc: return unaryCase<OpType::F64Trunc>();
+    case OpType::I32Ctz: return unaryCase<OpType::I32Ctz>();
+    case OpType::I64Ctz: return unaryCase<OpType::I64Ctz>();
+    case OpType::I32Popcnt: return unaryCase<OpType::I32Popcnt>();
+    case OpType::I64Popcnt: return unaryCase<OpType::I64Popcnt>();
 #define CREATE_CASE(name, id, b3op, inc) case OpType::name: return unaryCase<OpType::name>();
     FOR_EACH_WASM_SIMPLE_UNARY_OP(CREATE_CASE)
 #undef CREATE_CASE