We should have SSE4 detection in the X86 MacroAssembler.
authorutatane.tea@gmail.com <utatane.tea@gmail.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 26 Mar 2018 21:07:21 +0000 (21:07 +0000)
committerutatane.tea@gmail.com <utatane.tea@gmail.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 26 Mar 2018 21:07:21 +0000 (21:07 +0000)
https://bugs.webkit.org/show_bug.cgi?id=165363

Reviewed by JF Bastien.

Source/JavaScriptCore:

This patch adds popcnt support to WASM in x86_64 environment.
To use it, we refactor our CPUID feature detection in MacroAssemblerX86Common.
Our spec-tests already cover popcnt.

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::supportsCountPopulation):
* assembler/MacroAssemblerX86Common.cpp:
(JSC::MacroAssemblerX86Common::getCPUID):
(JSC::MacroAssemblerX86Common::getCPUIDEx):
(JSC::MacroAssemblerX86Common::collectCPUFeatures):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::countPopulation32):
(JSC::MacroAssemblerX86Common::supportsFloatingPointRounding):
(JSC::MacroAssemblerX86Common::supportsCountPopulation):
(JSC::MacroAssemblerX86Common::supportsAVX):
(JSC::MacroAssemblerX86Common::supportsLZCNT):
(JSC::MacroAssemblerX86Common::supportsBMI1):
(JSC::MacroAssemblerX86Common::isSSE2Present):
(JSC::MacroAssemblerX86Common::updateEax1EcxFlags): Deleted.
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::countPopulation64):
* assembler/X86Assembler.h:
(JSC::X86Assembler::popcnt_rr):
(JSC::X86Assembler::popcnt_mr):
(JSC::X86Assembler::popcntq_rr):
(JSC::X86Assembler::popcntq_mr):
* wasm/WasmB3IRGenerator.cpp:
(JSC::Wasm::B3IRGenerator::addOp<OpType::I32Popcnt>):
(JSC::Wasm::B3IRGenerator::addOp<OpType::I64Popcnt>):

Source/WTF:

GCC 5 supports clobbering PIC registers in inline ASM [1,2].

[1]: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
[2]: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216154

* wtf/Atomics.h:
(WTF::x86_cpuid):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@229988 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp
Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h
Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
Source/JavaScriptCore/assembler/X86Assembler.h
Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp
Source/WTF/ChangeLog
Source/WTF/wtf/Atomics.h

index 5d9ff26..293fb09 100644 (file)
@@ -1,3 +1,40 @@
+2018-03-26  Yusuke Suzuki  <utatane.tea@gmail.com>
+
+        We should have SSE4 detection in the X86 MacroAssembler.
+        https://bugs.webkit.org/show_bug.cgi?id=165363
+
+        Reviewed by JF Bastien.
+
+        This patch adds popcnt support to WASM in x86_64 environment.
+        To use it, we refactor our CPUID feature detection in MacroAssemblerX86Common.
+        Our spec-tests already cover popcnt.
+
+        * assembler/MacroAssemblerARM64.h:
+        (JSC::MacroAssemblerARM64::supportsCountPopulation):
+        * assembler/MacroAssemblerX86Common.cpp:
+        (JSC::MacroAssemblerX86Common::getCPUID):
+        (JSC::MacroAssemblerX86Common::getCPUIDEx):
+        (JSC::MacroAssemblerX86Common::collectCPUFeatures):
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::countPopulation32):
+        (JSC::MacroAssemblerX86Common::supportsFloatingPointRounding):
+        (JSC::MacroAssemblerX86Common::supportsCountPopulation):
+        (JSC::MacroAssemblerX86Common::supportsAVX):
+        (JSC::MacroAssemblerX86Common::supportsLZCNT):
+        (JSC::MacroAssemblerX86Common::supportsBMI1):
+        (JSC::MacroAssemblerX86Common::isSSE2Present):
+        (JSC::MacroAssemblerX86Common::updateEax1EcxFlags): Deleted.
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::countPopulation64):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::popcnt_rr):
+        (JSC::X86Assembler::popcnt_mr):
+        (JSC::X86Assembler::popcntq_rr):
+        (JSC::X86Assembler::popcntq_mr):
+        * wasm/WasmB3IRGenerator.cpp:
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I32Popcnt>):
+        (JSC::Wasm::B3IRGenerator::addOp<OpType::I64Popcnt>):
+
 2018-03-26  Filip Pizlo  <fpizlo@apple.com>
 
         DFG should know that CreateThis can be effectful
index 7f4497f..319d9ea 100644 (file)
@@ -1575,6 +1575,7 @@ public:
     static bool supportsFloatingPointSqrt() { return true; }
     static bool supportsFloatingPointAbs() { return true; }
     static bool supportsFloatingPointRounding() { return true; }
+    static bool supportsCountPopulation() { return false; }
 
     enum BranchTruncateType { BranchIfTruncateFailed, BranchIfTruncateSuccessful };
 
index c90b4a7..43b8625 100644 (file)
 #include "ProbeContext.h"
 #include <wtf/InlineASM.h>
 
+#if COMPILER(MSVC)
+#include <intrin.h>
+#endif
+
 namespace JSC {
 
 #if ENABLE(MASM_PROBE)
@@ -757,14 +761,56 @@ void MacroAssembler::probe(Probe::Function function, void* arg)
 }
 #endif // ENABLE(MASM_PROBE)
 
-#if CPU(X86) && !OS(MAC_OS_X)
-MacroAssemblerX86Common::SSE2CheckState MacroAssemblerX86Common::s_sse2CheckState = NotCheckedSSE2;
+MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUID(unsigned level)
+{
+    return getCPUIDEx(level, 0);
+}
+
+MacroAssemblerX86Common::CPUID MacroAssemblerX86Common::getCPUIDEx(unsigned level, unsigned count)
+{
+    CPUID result { };
+#if COMPILER(MSVC)
+    __cpuidex(bitwise_cast<int*>(result.data()), level, count);
+#else
+    __asm__ (
+        "cpuid\n"
+        : "=a"(result[0]), "=b"(result[1]), "=c"(result[2]), "=d"(result[3])
+        : "0"(level), "2"(count)
+    );
 #endif
+    return result;
+}
+
+void MacroAssemblerX86Common::collectCPUFeatures()
+{
+    static std::once_flag onceKey;
+    std::call_once(onceKey, [] {
+        {
+            CPUID cpuid = getCPUID(0x1);
+            s_sse2CheckState = (cpuid[3] & (1 << 26)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+            s_sse4_1CheckState = (cpuid[2] & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+            s_sse4_2CheckState = (cpuid[2] & (1 << 20)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+            s_popcntCheckState = (cpuid[2] & (1 << 23)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+            s_avxCheckState = (cpuid[2] & (1 << 28)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+        }
+        {
+            CPUID cpuid = getCPUID(0x7);
+            s_bmi1CheckState = (cpuid[2] & (1 << 3)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+        }
+        {
+            CPUID cpuid = getCPUID(0x80000001);
+            s_lzcntCheckState = (cpuid[2] & (1 << 5)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+        }
+    });
+}
 
+MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse2CheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
+MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_2CheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;
 MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_bmi1CheckState = CPUIDCheckState::NotChecked;
+MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_popcntCheckState = CPUIDCheckState::NotChecked;
 
 } // namespace JSC
 
index 0ebe849..7171fc4 100644 (file)
 
 #include "X86Assembler.h"
 #include "AbstractMacroAssembler.h"
+#include <array>
 #include <wtf/Optional.h>
 
-#if COMPILER(MSVC)
-#include <intrin.h>
-#endif
-
 namespace JSC {
 
 using Assembler = TARGET_ASSEMBLER;
@@ -389,6 +386,18 @@ public:
         ctzAfterBsf<32>(dst);
     }
 
+    void countPopulation32(Address src, RegisterID dst)
+    {
+        ASSERT(supportsCountPopulation());
+        m_assembler.popcnt_mr(src.offset, src.base, dst);
+    }
+
+    void countPopulation32(RegisterID src, RegisterID dst)
+    {
+        ASSERT(supportsCountPopulation());
+        m_assembler.popcnt_rr(src, dst);
+    }
+
     // Only used for testing purposes.
     void illegalInstruction()
     {
@@ -3882,48 +3891,21 @@ public:
     static bool supportsFloatingPointRounding()
     {
         if (s_sse4_1CheckState == CPUIDCheckState::NotChecked)
-            updateEax1EcxFlags();
+            collectCPUFeatures();
         return s_sse4_1CheckState == CPUIDCheckState::Set;
     }
 
-    static bool supportsAVX()
+    static bool supportsCountPopulation()
     {
-        // AVX still causes mysterious regressions and those regressions can be massive.
-        return false;
+        if (s_popcntCheckState == CPUIDCheckState::NotChecked)
+            collectCPUFeatures();
+        return s_popcntCheckState == CPUIDCheckState::Set;
     }
 
-    static void updateEax1EcxFlags()
+    static bool supportsAVX()
     {
-        int flags = 0;
-#if COMPILER(MSVC)
-        int cpuInfo[4];
-        __cpuid(cpuInfo, 0x1);
-        flags = cpuInfo[2];
-#elif COMPILER(GCC_OR_CLANG)
-#if CPU(X86_64)
-        asm (
-            "movl $0x1, %%eax;"
-            "cpuid;"
-            "movl %%ecx, %0;"
-            : "=g" (flags)
-            :
-            : "%eax", "%ebx", "%ecx", "%edx"
-            );
-#else
-        asm (
-            "movl $0x1, %%eax;"
-            "pushl %%ebx;"
-            "cpuid;"
-            "popl %%ebx;"
-            "movl %%ecx, %0;"
-            : "=g" (flags)
-            :
-            : "%eax", "%ecx", "%edx"
-            );
-#endif
-#endif // COMPILER(GCC_OR_CLANG)
-        s_sse4_1CheckState = (flags & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
-        s_avxCheckState = (flags & (1 << 28)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+        // AVX still causes mysterious regressions and those regressions can be massive.
+        return false;
     }
 
     void lfence()
@@ -4007,62 +3989,15 @@ protected:
 
     static bool supportsLZCNT()
     {
-        if (s_lzcntCheckState == CPUIDCheckState::NotChecked) {
-            int flags = 0;
-#if COMPILER(MSVC)
-            int cpuInfo[4];
-            __cpuid(cpuInfo, 0x80000001);
-            flags = cpuInfo[2];
-#elif COMPILER(GCC_OR_CLANG)
-#if CPU(X86_64)
-            asm (
-                "movl $0x80000001, %%eax;"
-                "cpuid;"
-                "movl %%ecx, %0;"
-                : "=g" (flags)
-                :
-                : "%eax", "%ebx", "%ecx", "%edx"
-                );
-#else
-            asm (
-                "movl $0x80000001, %%eax;"
-                "pushl %%ebx;"
-                "cpuid;"
-                "popl %%ebx;"
-                "movl %%ecx, %0;"
-                : "=g" (flags)
-                :
-                : "%eax", "%ecx", "%edx"
-                );
-#endif
-#endif // COMPILER(GCC_OR_CLANG)
-            s_lzcntCheckState = (flags & 0x20) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
-        }
+        if (s_lzcntCheckState == CPUIDCheckState::NotChecked)
+            collectCPUFeatures();
         return s_lzcntCheckState == CPUIDCheckState::Set;
     }
 
     static bool supportsBMI1()
     {
-        if (s_bmi1CheckState == CPUIDCheckState::NotChecked) {
-            int flags = 0;
-#if COMPILER(MSVC)
-            int cpuInfo[4];
-            __cpuid(cpuInfo, 0x80000001);
-            flags = cpuInfo[2];
-#elif COMPILER(GCC_OR_CLANG)
-            asm (
-                 "movl $0x7, %%eax;"
-                 "movl $0x0, %%ecx;"
-                 "cpuid;"
-                 "movl %%ebx, %0;"
-                 : "=g" (flags)
-                 :
-                 : "%eax", "%ebx", "%ecx", "%edx"
-                 );
-#endif // COMPILER(GCC_OR_CLANG)
-            static int BMI1FeatureBit = 1 << 3;
-            s_bmi1CheckState = (flags & BMI1FeatureBit) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
-        }
+        if (s_bmi1CheckState == CPUIDCheckState::NotChecked)
+            collectCPUFeatures();
         return s_bmi1CheckState == CPUIDCheckState::Set;
     }
 
@@ -4215,47 +4150,12 @@ private:
     }
 
 #else // OS(MAC_OS_X)
-
-    enum SSE2CheckState {
-        NotCheckedSSE2,
-        HasSSE2,
-        NoSSE2
-    };
-
     static bool isSSE2Present()
     {
-        if (s_sse2CheckState == NotCheckedSSE2) {
-            // Default the flags value to zero; if the compiler is
-            // not MSVC or GCC we will read this as SSE2 not present.
-            int flags = 0;
-#if COMPILER(MSVC)
-            _asm {
-                mov eax, 1 // cpuid function 1 gives us the standard feature set
-                cpuid;
-                mov flags, edx;
-            }
-#elif COMPILER(GCC_OR_CLANG)
-            asm (
-                 "movl $0x1, %%eax;"
-                 "pushl %%ebx;"
-                 "cpuid;"
-                 "popl %%ebx;"
-                 "movl %%edx, %0;"
-                 : "=g" (flags)
-                 :
-                 : "%eax", "%ecx", "%edx"
-                 );
-#endif
-            static const int SSE2FeatureBit = 1 << 26;
-            s_sse2CheckState = (flags & SSE2FeatureBit) ? HasSSE2 : NoSSE2;
-        }
-        // Only check once.
-        ASSERT(s_sse2CheckState != NotCheckedSSE2);
-
-        return s_sse2CheckState == HasSSE2;
+        if (s_sse2CheckState == CPUIDCheckState::NotChecked)
+            collectCPUFeatures();
+        return s_sse2CheckState == CPUIDCheckState::Set;
     }
-    
-    JS_EXPORTDATA static SSE2CheckState s_sse2CheckState;
 
 #endif // OS(MAC_OS_X)
 #elif !defined(NDEBUG) // CPU(X86)
@@ -4269,15 +4169,23 @@ private:
 
 #endif
 
+    using CPUID = std::array<unsigned, 4>;
+    static CPUID getCPUID(unsigned level);
+    static CPUID getCPUIDEx(unsigned level, unsigned count);
+    JS_EXPORT_PRIVATE static void collectCPUFeatures();
+
     enum class CPUIDCheckState {
         NotChecked,
         Clear,
         Set
     };
+    JS_EXPORT_PRIVATE static CPUIDCheckState s_sse2CheckState;
     JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_1CheckState;
+    JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_2CheckState;
     JS_EXPORT_PRIVATE static CPUIDCheckState s_avxCheckState;
-    static CPUIDCheckState s_bmi1CheckState;
-    static CPUIDCheckState s_lzcntCheckState;
+    JS_EXPORT_PRIVATE static CPUIDCheckState s_lzcntCheckState;
+    JS_EXPORT_PRIVATE static CPUIDCheckState s_bmi1CheckState;
+    JS_EXPORT_PRIVATE static CPUIDCheckState s_popcntCheckState;
 };
 
 } // namespace JSC
index d40c731..4f6b7f2 100644 (file)
@@ -472,6 +472,18 @@ public:
         ctzAfterBsf<64>(dst);
     }
 
+    void countPopulation64(RegisterID src, RegisterID dst)
+    {
+        ASSERT(supportsCountPopulation());
+        m_assembler.popcntq_rr(src, dst);
+    }
+
+    void countPopulation64(Address src, RegisterID dst)
+    {
+        ASSERT(supportsCountPopulation());
+        m_assembler.popcntq_mr(src.offset, src.base, dst);
+    }
+
     void lshift64(TrustedImm32 imm, RegisterID dest)
     {
         m_assembler.shlq_i8r(imm.m_value, dest);
index e20c6a9..4e60266 100644 (file)
@@ -326,6 +326,7 @@ private:
         OP2_CMPXCHGb        = 0xB0,
         OP2_CMPXCHG         = 0xB1,
         OP2_MOVZX_GvEb      = 0xB6,
+        OP2_POPCNT          = 0xB8,
         OP2_BSF             = 0xBC,
         OP2_TZCNT           = 0xBC,
         OP2_BSR             = 0xBD,
@@ -1651,6 +1652,32 @@ public:
     }
 #endif
 
+    void popcnt_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_POPCNT, dst, src);
+    }
+
+    void popcnt_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_POPCNT, dst, base, offset);
+    }
+
+#if CPU(X86_64)
+    void popcntq_rr(RegisterID src, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp64(OP2_POPCNT, dst, src);
+    }
+
+    void popcntq_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp64(OP2_POPCNT, dst, base, offset);
+    }
+#endif
+
 private:
     template<GroupOpcodeID op>
     void shiftInstruction32(int imm, RegisterID dst)
index 5c9f47a..b2252fa 100644 (file)
@@ -1567,8 +1567,19 @@ auto B3IRGenerator::addOp<OpType::I64Ctz>(ExpressionType arg, ExpressionType& re
 template<>
 auto B3IRGenerator::addOp<OpType::I32Popcnt>(ExpressionType arg, ExpressionType& result) -> PartialResult
 {
-    // FIXME: This should use the popcnt instruction if SSE4 is available but we don't have code to detect SSE4 yet.
-    // see: https://bugs.webkit.org/show_bug.cgi?id=165363
+#if CPU(X86_64)
+    if (MacroAssembler::supportsCountPopulation()) {
+        PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, Int32, origin());
+        patchpoint->append(arg, ValueRep::SomeRegister);
+        patchpoint->setGenerator([=] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+            jit.countPopulation32(params[1].gpr(), params[0].gpr());
+        });
+        patchpoint->effects = Effects::none();
+        result = patchpoint;
+        return { };
+    }
+#endif
+
     uint32_t (*popcount)(int32_t) = [] (int32_t value) -> uint32_t { return __builtin_popcount(value); };
     Value* funcAddress = m_currentBlock->appendNew<ConstPtrValue>(m_proc, origin(), bitwise_cast<void*>(popcount));
     result = m_currentBlock->appendNew<CCallValue>(m_proc, Int32, origin(), Effects::none(), funcAddress, arg);
@@ -1578,8 +1589,19 @@ auto B3IRGenerator::addOp<OpType::I32Popcnt>(ExpressionType arg, ExpressionType&
 template<>
 auto B3IRGenerator::addOp<OpType::I64Popcnt>(ExpressionType arg, ExpressionType& result) -> PartialResult
 {
-    // FIXME: This should use the popcnt instruction if SSE4 is available but we don't have code to detect SSE4 yet.
-    // see: https://bugs.webkit.org/show_bug.cgi?id=165363
+#if CPU(X86_64)
+    if (MacroAssembler::supportsCountPopulation()) {
+        PatchpointValue* patchpoint = m_currentBlock->appendNew<PatchpointValue>(m_proc, Int64, origin());
+        patchpoint->append(arg, ValueRep::SomeRegister);
+        patchpoint->setGenerator([=] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+            jit.countPopulation64(params[1].gpr(), params[0].gpr());
+        });
+        patchpoint->effects = Effects::none();
+        result = patchpoint;
+        return { };
+    }
+#endif
+
     uint64_t (*popcount)(int64_t) = [] (int64_t value) -> uint64_t { return __builtin_popcountll(value); };
     Value* funcAddress = m_currentBlock->appendNew<ConstPtrValue>(m_proc, origin(), bitwise_cast<void*>(popcount));
     result = m_currentBlock->appendNew<CCallValue>(m_proc, Int64, origin(), Effects::none(), funcAddress, arg);
index f4295f0..9f440e3 100644 (file)
@@ -1,3 +1,18 @@
+2018-03-26  Yusuke Suzuki  <utatane.tea@gmail.com>
+
+        We should have SSE4 detection in the X86 MacroAssembler.
+        https://bugs.webkit.org/show_bug.cgi?id=165363
+
+        Reviewed by JF Bastien.
+
+        GCC 5 supports clobbering PIC registers in inline ASM [1,2].
+
+        [1]: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
+        [2]: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216154
+
+        * wtf/Atomics.h:
+        (WTF::x86_cpuid):
+
 2018-03-26  Antoine Quint  <graouts@apple.com>
 
         [ASan] Allow Ref<> to be swapped
index 03d3808..f23ec58 100644 (file)
@@ -305,18 +305,6 @@ inline void x86_cpuid()
 #if OS(WINDOWS)
     int info[4];
     __cpuid(info, 0);
-#elif CPU(X86)
-    // GCC 4.9 on x86 in PIC mode can't use %ebx, so we have to save and restore it manually.
-    // But since we don't care about what cpuid returns (we use it as a serializing instruction),
-    // we can simply throw away what cpuid put in %ebx.
-    intptr_t a = 0, c, d;
-    asm volatile(
-        "pushl %%ebx\n\t"
-        "cpuid\n\t"
-        "popl %%ebx\n\t"
-        : "+a"(a), "=c"(c), "=d"(d)
-        :
-        : "memory");
 #else
     intptr_t a = 0, b, c, d;
     asm volatile(