[X86] Emit BT instruction for shift + mask in B3
authorjustin_michaud@apple.com <justin_michaud@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 27 Jul 2019 07:08:01 +0000 (07:08 +0000)
committerjustin_michaud@apple.com <justin_michaud@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 27 Jul 2019 07:08:01 +0000 (07:08 +0000)
https://bugs.webkit.org/show_bug.cgi?id=199891

Reviewed by Keith Miller.

JSTests:

* microbenchmarks/bit-test-constant.js: Added.
(let.glob.0.doTest):
* microbenchmarks/bit-test-load.js: Added.
(let.glob.0.let.arr.new.Int32Array.8.doTest):
(i):
* microbenchmarks/bit-test-nonconstant.js: Added.
(let.glob.0.doTest):

Source/JavaScriptCore:

- Add a new BranchTestBit air opcode, matching the intel bt instruction
- Select this instruction for the following patterns:
  if (a & (1<<b))
  if ((a>>b)&1)
  if ((~a>>b)&1)
  if (~a & (1<<b))
- 15% perf progression on the nonconstant microbenchmark, neutral otherwise.
- Note: we cannot fuse loads when we have bitBase=Load, bitOffset=Tmp, since the X86 instruction has
  different behaviour in this mode. It will read past the current dword/qword instead of wrapping around.

* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::branchTestBit32):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::branchTestBit64):
* assembler/X86Assembler.h:
(JSC::X86Assembler::bt_ir):
(JSC::X86Assembler::bt_im):
(JSC::X86Assembler::btw_ir):
(JSC::X86Assembler::btw_im):
* assembler/testmasm.cpp:
(JSC::int64Operands):
(JSC::testBranchTestBit32RegReg):
(JSC::testBranchTestBit32RegImm):
(JSC::testBranchTestBit32AddrImm):
(JSC::testBranchTestBit64RegReg):
(JSC::testBranchTestBit64RegImm):
(JSC::testBranchTestBit64AddrImm):
(JSC::run):
* b3/B3LowerToAir.cpp:
* b3/air/AirOpcode.opcodes:
* b3/testb3.cpp:
(JSC::B3::testBranchBitTest32TmpImm):
(JSC::B3::testBranchBitTest32AddrImm):
(JSC::B3::testBranchBitTest32TmpTmp):
(JSC::B3::testBranchBitTest64TmpTmp):
(JSC::B3::testBranchBitTest64AddrTmp):
(JSC::B3::run):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@247889 268f45cc-cd09-0410-ab3c-d52691b4dbfc

12 files changed:
JSTests/ChangeLog
JSTests/microbenchmarks/bit-test-constant.js [new file with mode: 0644]
JSTests/microbenchmarks/bit-test-load.js [new file with mode: 0644]
JSTests/microbenchmarks/bit-test-nonconstant.js [new file with mode: 0644]
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h
Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
Source/JavaScriptCore/assembler/X86Assembler.h
Source/JavaScriptCore/assembler/testmasm.cpp
Source/JavaScriptCore/b3/B3LowerToAir.cpp
Source/JavaScriptCore/b3/air/AirOpcode.opcodes
Source/JavaScriptCore/b3/testb3.cpp

index c518b77..e54b637 100644 (file)
@@ -1,3 +1,18 @@
+2019-07-27  Justin Michaud  <justin_michaud@apple.com>
+
+        [X86] Emit BT instruction for shift + mask in B3
+        https://bugs.webkit.org/show_bug.cgi?id=199891
+
+        Reviewed by Keith Miller.
+
+        * microbenchmarks/bit-test-constant.js: Added.
+        (let.glob.0.doTest):
+        * microbenchmarks/bit-test-load.js: Added.
+        (let.glob.0.let.arr.new.Int32Array.8.doTest):
+        (i):
+        * microbenchmarks/bit-test-nonconstant.js: Added.
+        (let.glob.0.doTest):
+
 2019-07-26  Yusuke Suzuki  <ysuzuki@apple.com>
 
         [JSC] Potential GC fix for JSPropertyNameEnumerator
diff --git a/JSTests/microbenchmarks/bit-test-constant.js b/JSTests/microbenchmarks/bit-test-constant.js
new file mode 100644 (file)
index 0000000..8769e81
--- /dev/null
@@ -0,0 +1,19 @@
+let glob = 0
+
+function doTest(number) {
+    if ((number>>16)&1)
+        glob += 1
+    if ((number>>15)&1)
+        glob -= 1
+    if ((number>>18)&1)
+        glob += 1
+    if ((number>>19)&1)
+        glob += 1
+}
+noInline(doTest);
+
+for (let i=0; i<(1<<30); ++i)
+    doTest(i)
+
+if (glob != 1073741824)
+    throw "Error: bad result: " + glob;
diff --git a/JSTests/microbenchmarks/bit-test-load.js b/JSTests/microbenchmarks/bit-test-load.js
new file mode 100644 (file)
index 0000000..417bc1f
--- /dev/null
@@ -0,0 +1,22 @@
+let glob = 0
+let arr = new Int32Array(8)
+
+function doTest() {
+    if ((arr[0]>>16)&1)
+        glob += 1
+    if ((arr[1]>>15)&1)
+        glob -= 1
+    if ((arr[2]>>18)&1)
+        glob += 1
+    if ((arr[3]>>19)&1)
+        glob += 1
+}
+noInline(doTest);
+
+for (let i=0; i<(1<<30); ++i) {
+    arr[0] = arr[1] = arr[2] = arr[3] = i
+    doTest()
+}
+
+if (glob != 1073741824)
+    throw "Error: bad result: " + glob;
diff --git a/JSTests/microbenchmarks/bit-test-nonconstant.js b/JSTests/microbenchmarks/bit-test-nonconstant.js
new file mode 100644 (file)
index 0000000..912e2eb
--- /dev/null
@@ -0,0 +1,18 @@
+let glob = 0
+
+function doTest(number, bit) {
+    glob -= 1
+    if ((number>>bit)&1)
+        glob += 1
+    if (((~number)>>(bit+1))&1)
+        glob += 1
+    if (number & (1<<(bit-1)))
+        glob += 1
+}
+noInline(doTest);
+
+for (let i=0; i<(1<<30); ++i)
+    doTest(i, 15)
+
+if (glob != 536870912)
+    throw "Error: bad result: " + glob;
index 23b5d8a..b4da140 100644 (file)
@@ -1,3 +1,48 @@
+2019-07-27  Justin Michaud  <justin_michaud@apple.com>
+
+        [X86] Emit BT instruction for shift + mask in B3
+        https://bugs.webkit.org/show_bug.cgi?id=199891
+
+        Reviewed by Keith Miller.
+
+        - Add a new BranchTestBit air opcode, matching the intel bt instruction
+        - Select this instruction for the following patterns:
+          if (a & (1<<b))
+          if ((a>>b)&1)
+          if ((~a>>b)&1)
+          if (~a & (1<<b))
+        - 15% perf progression on the nonconstant microbenchmark, neutral otherwise.
+        - Note: we cannot fuse loads when we have bitBase=Load, bitOffset=Tmp, since the X86 instruction has 
+          different behaviour in this mode. It will read past the current dword/qword instead of wrapping around.
+
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::branchTestBit32):
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::branchTestBit64):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::bt_ir):
+        (JSC::X86Assembler::bt_im):
+        (JSC::X86Assembler::btw_ir):
+        (JSC::X86Assembler::btw_im):
+        * assembler/testmasm.cpp:
+        (JSC::int64Operands):
+        (JSC::testBranchTestBit32RegReg):
+        (JSC::testBranchTestBit32RegImm):
+        (JSC::testBranchTestBit32AddrImm):
+        (JSC::testBranchTestBit64RegReg):
+        (JSC::testBranchTestBit64RegImm):
+        (JSC::testBranchTestBit64AddrImm):
+        (JSC::run):
+        * b3/B3LowerToAir.cpp:
+        * b3/air/AirOpcode.opcodes:
+        * b3/testb3.cpp:
+        (JSC::B3::testBranchBitTest32TmpImm):
+        (JSC::B3::testBranchBitTest32AddrImm):
+        (JSC::B3::testBranchBitTest32TmpTmp):
+        (JSC::B3::testBranchBitTest64TmpTmp):
+        (JSC::B3::testBranchBitTest64AddrTmp):
+        (JSC::B3::run):
+
 2019-07-26  Yusuke Suzuki  <ysuzuki@apple.com>
 
         [JSC] Potential GC fix for JSPropertyNameEnumerator
index ff09729..df25a58 100644 (file)
@@ -2644,6 +2644,36 @@ public:
         return Jump(m_assembler.jCC(x86Condition(cond)));
     }
 
+    Jump branchTestBit32(ResultCondition cond, RegisterID reg, TrustedImm32 bit)
+    {
+        m_assembler.bt_ir(static_cast<unsigned>(bit.m_value) % 32, reg);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
+    Jump branchTestBit32(ResultCondition cond, Address testValue, TrustedImm32 bit)
+    {
+        m_assembler.bt_im(static_cast<unsigned>(bit.m_value) % 32, testValue.offset, testValue.base);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
+    Jump branchTestBit32(ResultCondition cond, RegisterID reg, RegisterID bit)
+    {
+        m_assembler.bt_ir(bit, reg);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
     void test32(RegisterID reg, TrustedImm32 mask = TrustedImm32(-1))
     {
         if (mask.m_value == -1)
index 0f6a714..452d28d 100644 (file)
@@ -1103,6 +1103,36 @@ public:
         return branchTest64(cond, reg, scratchRegister());
     }
 
+    Jump branchTestBit64(ResultCondition cond, RegisterID testValue, TrustedImm32 bit)
+    {
+        m_assembler.btw_ir(static_cast<unsigned>(bit.m_value) % 64, testValue);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
+    Jump branchTestBit64(ResultCondition cond, Address testValue, TrustedImm32 bit)
+    {
+        m_assembler.btw_im(static_cast<unsigned>(bit.m_value) % 64, testValue.offset, testValue.base);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
+    Jump branchTestBit64(ResultCondition cond, RegisterID reg, RegisterID bit)
+    {
+        m_assembler.btw_ir(bit, reg);
+        if (cond == NonZero)
+            return Jump(m_assembler.jb());
+        if (cond == Zero)
+            return Jump(m_assembler.jae());
+        RELEASE_ASSERT_NOT_REACHED();
+    }
+
     void test64(ResultCondition cond, RegisterID reg, TrustedImm32 mask, RegisterID dest)
     {
         if (mask.m_value == -1)
index 5b0c85b..8e9d842 100644 (file)
@@ -294,6 +294,8 @@ private:
         OP2_CMPXCHG         = 0xB1,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_POPCNT          = 0xB8,
+        OP2_GROUP_BT_EvIb   = 0xBA,
+        OP2_BT_EvEv         = 0xA3,
         OP2_BSF             = 0xBC,
         OP2_TZCNT           = 0xBC,
         OP2_BSR             = 0xBD,
@@ -382,6 +384,8 @@ private:
 
         ESCAPE_D9_FSTP_singleReal = 3,
         ESCAPE_DD_FSTP_doubleReal = 3,
+
+        GROUP_BT_OP_BT = 4,
     } GroupOpcodeID;
     
     class X86InstructionFormatter;
@@ -2149,6 +2153,56 @@ public:
         m_formatter.immediate8(imm);
     }
 
+    void bt_ir(int bitOffset, RegisterID testValue)
+    {
+        ASSERT(-128 <= bitOffset && bitOffset < 128);
+        m_formatter.twoByteOp(OP2_GROUP_BT_EvIb, GROUP_BT_OP_BT, testValue);
+        m_formatter.immediate8(bitOffset);
+    }
+
+    void bt_im(int bitOffset, int offset, RegisterID base)
+    {
+        ASSERT(-128 <= bitOffset && bitOffset < 128);
+        m_formatter.twoByteOp(OP2_GROUP_BT_EvIb, GROUP_BT_OP_BT, base, offset);
+        m_formatter.immediate8(bitOffset);
+    }
+
+    void bt_ir(RegisterID bitOffset, RegisterID testValue)
+    {
+        m_formatter.twoByteOp(OP2_BT_EvEv, bitOffset, testValue);
+    }
+
+    void bt_im(RegisterID bitOffset, int offset, RegisterID base)
+    {
+        m_formatter.twoByteOp(OP2_BT_EvEv, bitOffset, base, offset);
+    }
+
+#if CPU(X86_64)
+    void btw_ir(int bitOffset, RegisterID testValue)
+    {
+        ASSERT(-128 <= bitOffset && bitOffset < 128);
+        m_formatter.twoByteOp64(OP2_GROUP_BT_EvIb, GROUP_BT_OP_BT, testValue);
+        m_formatter.immediate8(bitOffset);
+    }
+
+    void btw_im(int bitOffset, int offset, RegisterID base)
+    {
+        ASSERT(-128 <= bitOffset && bitOffset < 128);
+        m_formatter.twoByteOp64(OP2_GROUP_BT_EvIb, GROUP_BT_OP_BT, base, offset);
+        m_formatter.immediate8(bitOffset);
+    }
+
+    void btw_ir(RegisterID bitOffset, RegisterID testValue)
+    {
+        m_formatter.twoByteOp64(OP2_BT_EvEv, bitOffset, testValue);
+    }
+
+    void btw_im(RegisterID bitOffset, int offset, RegisterID base)
+    {
+        m_formatter.twoByteOp64(OP2_BT_EvEv, bitOffset, base, offset);
+    }
+#endif
+
     void setCC_r(Condition cond, RegisterID dst)
     {
         m_formatter.twoByteOp8(setccOpcode(cond), (GroupOpcodeID)0, dst);
index c906b37..94ce32c 100644 (file)
@@ -293,6 +293,161 @@ static Vector<int32_t> int32Operands()
     };
 }
 
+#if CPU(X86_64)
+static Vector<int64_t> int64Operands()
+{
+    return Vector<int64_t> {
+        0,
+        1,
+        -1,
+        2,
+        -2,
+        42,
+        -42,
+        64,
+        std::numeric_limits<int32_t>::max(),
+        std::numeric_limits<int32_t>::min(),
+        std::numeric_limits<int64_t>::max(),
+        std::numeric_limits<int64_t>::min(),
+    };
+}
+#endif
+
+#if CPU(X86_64)
+void testBranchTestBit32RegReg()
+{
+    for (uint32_t value : int32Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit32(MacroAssembler::NonZero, GPRInfo::argumentGPR0, GPRInfo::argumentGPR1);
+            jit.move(CCallHelpers::TrustedImm32(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm32(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint32_t value2 : int32Operands())
+            CHECK_EQ(invoke<int>(test, value, value2), (value>>(value2%32))&1);
+    }
+}
+
+void testBranchTestBit32RegImm()
+{
+    for (uint32_t value : int32Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit32(MacroAssembler::NonZero, GPRInfo::argumentGPR0, CCallHelpers::TrustedImm32(value));
+            jit.move(CCallHelpers::TrustedImm32(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm32(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint32_t value2 : int32Operands())
+            CHECK_EQ(invoke<int>(test, value2), (value2>>(value%32))&1);
+    }
+}
+
+void testBranchTestBit32AddrImm()
+{
+    for (uint32_t value : int32Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit32(MacroAssembler::NonZero, MacroAssembler::Address(GPRInfo::argumentGPR0, 0), CCallHelpers::TrustedImm32(value));
+            jit.move(CCallHelpers::TrustedImm32(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm32(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint32_t value2 : int32Operands())
+            CHECK_EQ(invoke<int>(test, &value2), (value2>>(value%32))&1);
+    }
+}
+
+void testBranchTestBit64RegReg()
+{
+    for (uint64_t value : int64Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit64(MacroAssembler::NonZero, GPRInfo::argumentGPR0, GPRInfo::argumentGPR1);
+            jit.move(CCallHelpers::TrustedImm64(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm64(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint64_t value2 : int64Operands())
+            CHECK_EQ(invoke<long int>(test, value, value2), (value>>(value2%64))&1);
+    }
+}
+
+void testBranchTestBit64RegImm()
+{
+    for (uint64_t value : int64Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit64(MacroAssembler::NonZero, GPRInfo::argumentGPR0, CCallHelpers::TrustedImm32(value));
+            jit.move(CCallHelpers::TrustedImm64(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm64(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint64_t value2 : int64Operands())
+            CHECK_EQ(invoke<long int>(test, value2), (value2>>(value%64))&1);
+    }
+}
+
+void testBranchTestBit64AddrImm()
+{
+    for (uint64_t value : int64Operands()) {
+        auto test = compile([=] (CCallHelpers& jit) {
+            jit.emitFunctionPrologue();
+
+            auto branch = jit.branchTestBit64(MacroAssembler::NonZero, MacroAssembler::Address(GPRInfo::argumentGPR0, 0), CCallHelpers::TrustedImm32(value));
+            jit.move(CCallHelpers::TrustedImm64(0), GPRInfo::returnValueGPR);
+            auto done = jit.jump();
+            branch.link(&jit);
+            jit.move(CCallHelpers::TrustedImm64(1), GPRInfo::returnValueGPR);
+            done.link(&jit);
+
+            jit.emitFunctionEpilogue();
+            jit.ret();
+        });
+
+        for (uint64_t value2 : int64Operands())
+            CHECK_EQ(invoke<long int>(test, &value2), (value2>>(value%64))&1);
+    }
+}
+
+#endif
+
 void testCompareDouble(MacroAssembler::DoubleCondition condition)
 {
     double arg1 = 0;
@@ -1137,6 +1292,15 @@ void run(const char* filter)
     RUN(testCompareDouble(MacroAssembler::DoubleLessThanOrEqualOrUnordered));
     RUN(testMul32WithImmediates());
 
+#if CPU(X86_64)
+    RUN(testBranchTestBit32RegReg());
+    RUN(testBranchTestBit32RegImm());
+    RUN(testBranchTestBit32AddrImm());
+    RUN(testBranchTestBit64RegReg());
+    RUN(testBranchTestBit64RegImm());
+    RUN(testBranchTestBit64AddrImm());
+#endif
+
 #if CPU(ARM64)
     RUN(testMul32SignExtend());
 #endif
index 58c0240..1df12f6 100644 (file)
@@ -3325,7 +3325,84 @@ private:
         case Branch: {
             if (canBeInternal(m_value->child(0))) {
                 Value* branchChild = m_value->child(0);
+
                 switch (branchChild->opcode()) {
+                case BitAnd: {
+                    Value* andValue = branchChild->child(0);
+                    Value* andMask = branchChild->child(1);
+                    Air::Opcode opcode = opcodeForType(BranchTestBit32, BranchTestBit64, andValue->type());
+
+                    Value* testValue = nullptr;
+                    Value* bitOffset = nullptr;
+                    Value* internalNode = nullptr;
+                    Value* negationNode = nullptr;
+                    bool inverted = false;
+
+                    // if (~(val >> x)&1)
+                    if (andMask->isInt(1)
+                        && andValue->opcode() == BitXor && (andValue->child(1)->isInt32(-1) || andValue->child(1)->isInt64(-1l))
+                        && (andValue->child(0)->opcode() == SShr || andValue->child(0)->opcode() == ZShr)) {
+
+                        negationNode = andValue;
+                        testValue = andValue->child(0)->child(0);
+                        bitOffset = andValue->child(0)->child(1);
+                        internalNode = andValue->child(0);
+                        inverted = !inverted;
+                    }
+
+                    // Turn if ((val >> x)&1) -> Bt val x
+                    if (andMask->isInt(1) && (andValue->opcode() == SShr || andValue->opcode() == ZShr)) {
+                        testValue = andValue->child(0);
+                        bitOffset = andValue->child(1);
+                        internalNode = andValue;
+                    }
+
+                    // Turn if (val & (1<<x)) -> Bt val x
+                    if ((andMask->opcode() == Shl) && andMask->child(0)->isInt(1)) {
+                        testValue = andValue;
+                        bitOffset = andMask->child(1);
+                        internalNode = andMask;
+                    }
+
+                    // if (~val & (1<<x)) or if ((~val >> x)&1)
+                    if (!negationNode && testValue && testValue->opcode() == BitXor && (testValue->child(1)->isInt32(-1) || testValue->child(1)->isInt64(-1l))) {
+                        negationNode = testValue;
+                        testValue = testValue->child(0);
+                        inverted = !inverted;
+                    }
+
+                    if (testValue && bitOffset) {
+                        for (auto& basePromise : Vector<ArgPromise>::from(loadPromise(testValue), tmpPromise(testValue))) {
+                            bool hasLoad = basePromise.kind() != Arg::Tmp;
+                            bool canMakeInternal = (hasLoad ? canBeInternal(testValue) : !m_locked.contains(testValue))
+                                && (!negationNode || canBeInternal(negationNode))
+                                && (!internalNode || canBeInternal(internalNode));
+
+                            if (basePromise && canMakeInternal) {
+                                if (bitOffset->hasInt() && isValidForm(opcode, Arg::ResCond, basePromise.kind(), Arg::Imm)) {
+                                    commitInternal(branchChild);
+                                    commitInternal(internalNode);
+                                    if (hasLoad)
+                                        commitInternal(testValue);
+                                    commitInternal(negationNode);
+                                    append(basePromise.inst(opcode, m_value, Arg::resCond(MacroAssembler::NonZero).inverted(inverted), basePromise.consume(*this), Arg::imm(bitOffset->asInt())));
+                                    return;
+                                }
+
+                                if (!m_locked.contains(bitOffset) && isValidForm(opcode, Arg::ResCond, basePromise.kind(), Arg::Tmp)) {
+                                    commitInternal(branchChild);
+                                    commitInternal(internalNode);
+                                    if (hasLoad)
+                                        commitInternal(testValue);
+                                    commitInternal(negationNode);
+                                    append(basePromise.inst(opcode, m_value, Arg::resCond(MacroAssembler::NonZero).inverted(inverted), basePromise.consume(*this), tmp(bitOffset)));
+                                    return;
+                                }
+                            }
+                        }
+                    }
+                    break;
+                }
                 case AtomicWeakCAS:
                     commitInternal(branchChild);
                     appendCAS(branchChild, false);
index cb7d637..901c564 100644 (file)
@@ -1161,6 +1161,16 @@ BranchTest32 U:G:32, U:G:32, U:G:32 /branch
     x86: ResCond, Addr, Tmp
     x86: ResCond, Index, BitImm
 
+x86_64: BranchTestBit64 U:G:32, U:G:64, U:G:8 /branch
+    ResCond, Tmp, Imm
+    ResCond, Addr, Imm
+    ResCond, Tmp, Tmp
+
+x86: BranchTestBit32 U:G:32, U:G:32, U:G:8 /branch
+    ResCond, Tmp, Imm
+    ResCond, Addr, Imm
+    ResCond, Tmp, Tmp
+
 BranchDouble U:G:32, U:F:64, U:F:64 /branch
     DoubleCond, Tmp, Tmp
 
index e841bb8..4411e03 100644 (file)
@@ -8754,6 +8754,261 @@ void testComplex(unsigned numVars, unsigned numConstructs)
     dataLog(toCString("    That took ", (after - before).milliseconds(), " ms.\n"));
 }
 
+void testBranchBitTest32TmpImm(uint32_t value, uint32_t imm)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+    Value* bitOffset = root->appendNew<Const32Value>(proc, Origin(), imm);
+
+    Value* one = root->appendNew<Const32Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        root->appendNew<Value>(proc, SShr, Origin(), testValue, bitOffset),
+        one);
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const32Value>(proc, Origin(), 1));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const32Value>(proc, Origin(), 0));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint32_t>(*code, value), (value>>(imm%32))&1);
+}
+
+void testBranchBitTest32AddrImm(uint32_t value, uint32_t imm)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<MemoryValue>(
+        proc, Load, Int32, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+    Value* bitOffset = root->appendNew<Const32Value>(proc, Origin(), imm);
+
+    Value* one = root->appendNew<Const32Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        root->appendNew<Value>(proc, SShr, Origin(), testValue, bitOffset),
+        one);
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const32Value>(proc, Origin(), 1));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const32Value>(proc, Origin(), 0));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint32_t>(*code, &value), (value>>(imm%32))&1);
+}
+
+void testBranchBitTest32TmpTmp(uint32_t value, uint32_t value2)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+    Value* bitOffset = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
+
+    Value* one = root->appendNew<Const32Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        root->appendNew<Value>(proc, SShr, Origin(), testValue, bitOffset),
+        one);
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const32Value>(proc, Origin(), 1));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const32Value>(proc, Origin(), 0));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint32_t>(*code, value, value2), (value>>(value2%32))&1);
+}
+
+void testBranchBitTest64TmpTmp(uint64_t value, uint64_t value2)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<Value>(proc, BitXor, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0),
+        root->appendNew<Const64Value>(proc, Origin(), -1l));
+    Value* bitOffset = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
+
+    Value* one = root->appendNew<Const64Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        testValue,
+        root->appendNew<Value>(proc, Shl, Origin(), one, bitOffset));
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const64Value>(proc, Origin(), 0));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const64Value>(proc, Origin(), 1));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint64_t>(*code, value, value2), (value>>(value2%64))&1);
+}
+
+void testBranchBitTest64AddrTmp(uint64_t value, uint64_t value2)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<MemoryValue>(
+        proc, Load, Int64, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+    Value* bitOffset = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
+
+    Value* one = root->appendNew<Const64Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        testValue,
+        root->appendNew<Value>(proc, Shl, Origin(), one, bitOffset));
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const64Value>(proc, Origin(), 1));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const64Value>(proc, Origin(), 0));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint64_t>(*code, &value, value2), (value>>(value2%64))&1);
+}
+
+void testBranchBitTestNegation(uint64_t value, uint64_t value2)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+    Value* bitOffset = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
+    Value* shift = root->appendNew<Value>(proc, SShr, Origin(), testValue, bitOffset);
+
+    Value* one = root->appendNew<Const64Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        root->appendNew<Value>(proc, BitXor, Origin(), shift, root->appendNew<Const64Value>(proc, Origin(), -1l)),
+        one);
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const64Value>(proc, Origin(), 0));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const64Value>(proc, Origin(), 1));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint64_t>(*code, value, value2), (value>>(value2%64))&1);
+}
+
+void testBranchBitTestNegation2(uint64_t value, uint64_t value2)
+{
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    BasicBlock* thenCase = proc.addBlock();
+    BasicBlock* elseCase = proc.addBlock();
+
+    Value* testValue = root->appendNew<Value>(proc, BitXor, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0),
+        root->appendNew<Const64Value>(proc, Origin(), -1l));
+    Value* bitOffset = root->appendNew<Value>(
+        proc, Trunc, Origin(),
+        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
+    Value* shift = root->appendNew<Value>(proc, SShr, Origin(), testValue, bitOffset);
+
+    Value* one = root->appendNew<Const64Value>(proc, Origin(), 1);
+    Value* bitTest = root->appendNew<Value>(
+        proc, BitAnd, Origin(),
+        shift,
+        one);
+
+    root->appendNewControlValue(
+        proc, Branch, Origin(),
+        bitTest,
+        FrequentedBlock(thenCase), FrequentedBlock(elseCase));
+
+    thenCase->appendNewControlValue(
+        proc, Return, Origin(),
+        thenCase->appendNew<Const64Value>(proc, Origin(), 0));
+
+    elseCase->appendNewControlValue(
+        proc, Return, Origin(),
+        elseCase->appendNew<Const64Value>(proc, Origin(), 1));
+
+    auto code = compileProc(proc);
+    CHECK_EQ(invoke<uint64_t>(*code, value, value2), (value>>(value2%64))&1);
+}
+
 void testSimplePatchpoint()
 {
     Procedure proc;
@@ -18118,6 +18373,14 @@ void run(const char* filter)
     RUN(testComplex(4, 256));
     RUN(testComplex(4, 384));
 
+    RUN_BINARY(testBranchBitTest32TmpImm, int32Operands(), int32Operands());
+    RUN_BINARY(testBranchBitTest32AddrImm, int32Operands(), int32Operands());
+    RUN_BINARY(testBranchBitTest32TmpTmp, int32Operands(), int32Operands());
+    RUN_BINARY(testBranchBitTest64TmpTmp, int64Operands(), int64Operands());
+    RUN_BINARY(testBranchBitTest64AddrTmp, int64Operands(), int64Operands());
+    RUN_BINARY(testBranchBitTestNegation, int64Operands(), int64Operands());
+    RUN_BINARY(testBranchBitTestNegation2, int64Operands(), int64Operands());
+
     RUN(testSimplePatchpoint());
     RUN(testSimplePatchpointWithoutOuputClobbersGPArgs());
     RUN(testSimplePatchpointWithOuputClobbersGPArgs());