Initial work to reduce cost of JSNumberCell allocation
authoroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 30 Oct 2008 04:33:21 +0000 (04:33 +0000)
committeroliver@apple.com <oliver@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 30 Oct 2008 04:33:21 +0000 (04:33 +0000)
Reviewed by Geoffrey Garen

This does the initial work needed to bring more of number
allocation into CTI code directly, rather than just falling
back onto the slow paths if we can't guarantee that a number
cell can be reused.

Initial implementation only used by op_negate to make sure
it all works.  In a negate heavy (though not dominated) test
it results in a 10% win in the non-reusable cell case.

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@37991 268f45cc-cd09-0410-ab3c-d52691b4dbfc

13 files changed:
JavaScriptCore/ChangeLog
JavaScriptCore/VM/CTI.cpp
JavaScriptCore/VM/CTI.h
JavaScriptCore/VM/CodeBlock.cpp
JavaScriptCore/VM/CodeGenerator.cpp
JavaScriptCore/VM/CodeGenerator.h
JavaScriptCore/VM/Instruction.h
JavaScriptCore/VM/Machine.cpp
JavaScriptCore/VM/Machine.h
JavaScriptCore/kjs/ResultType.h
JavaScriptCore/kjs/nodes.cpp
JavaScriptCore/masm/X86Assembler.h
JavaScriptCore/runtime/JSNumberCell.h

index c044ac4..255796f 100644 (file)
@@ -1,3 +1,51 @@
+2008-10-29  Oliver Hunt  <oliver@apple.com>
+
+        Reviewed by Geoff Garen.
+
+        Initial work to reduce cost of JSNumberCell allocation
+
+        This does the initial work needed to bring more of number
+        allocation into CTI code directly, rather than just falling
+        back onto the slow paths if we can't guarantee that a number
+        cell can be reused.
+
+        Initial implementation only used by op_negate to make sure
+        it all works.  In a negate heavy (though not dominated) test
+        it results in a 10% win in the non-reusable cell case.
+
+        * VM/CTI.cpp:
+        (JSC::):
+        (JSC::CTI::emitAllocateNumber):
+        (JSC::CTI::emitNakedFastCall):
+        (JSC::CTI::emitArithIntToImmWithJump):
+        (JSC::CTI::privateCompileMainPass):
+        (JSC::CTI::privateCompileSlowCases):
+        * VM/CTI.h:
+        * VM/CodeBlock.cpp:
+        (JSC::CodeBlock::dump):
+        * VM/CodeGenerator.cpp:
+        (JSC::CodeGenerator::emitUnaryOp):
+        * VM/CodeGenerator.h:
+        (JSC::CodeGenerator::emitToJSNumber):
+        (JSC::CodeGenerator::emitTypeOf):
+        (JSC::CodeGenerator::emitGetPropertyNames):
+        * VM/Machine.cpp:
+        (JSC::Machine::privateExecute):
+        * VM/Machine.h:
+        * kjs/ResultType.h:
+        (JSC::ResultType::isReusableNumber):
+        (JSC::ResultType::toInt):
+        * kjs/nodes.cpp:
+        (JSC::UnaryOpNode::emitCode):
+        (JSC::BinaryOpNode::emitCode):
+        (JSC::EqualNode::emitCode):
+        * masm/X86Assembler.h:
+        (JSC::X86Assembler::):
+        (JSC::X86Assembler::negl_r):
+        (JSC::X86Assembler::xorpd_mr):
+        * runtime/JSNumberCell.h:
+        (JSC::JSNumberCell::JSNumberCell):
+
 2008-10-29  Steve Falkenburg  <sfalken@apple.com>
 
         <rdar://problem/6326563> Crash on launch
index eac089e..5408bf0 100644 (file)
@@ -311,6 +311,20 @@ void CTI::printOpcodeOperandTypes(unsigned src1, unsigned src2)
 
 #endif
 
+extern "C" {
+    static JSValue* FASTCALL allocateNumber(JSGlobalData* globalData) {
+        JSValue* result = new (globalData) JSNumberCell(globalData);
+        ASSERT(result);
+        return result;
+    }
+}
+
+ALWAYS_INLINE void CTI::emitAllocateNumber(JSGlobalData* globalData, unsigned opcodeIndex)
+{
+    m_jit.movl_i32r(reinterpret_cast<intptr_t>(globalData), X86::ecx);
+    emitNakedFastCall(opcodeIndex, (void*)allocateNumber);
+}
+
 ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitNakedCall(unsigned opcodeIndex, X86::RegisterID r)
 {
     X86Assembler::JmpSrc call = m_jit.emitCall(r);
@@ -326,6 +340,13 @@ ALWAYS_INLINE  X86Assembler::JmpSrc CTI::emitNakedCall(unsigned opcodeIndex, voi
     return call;
 }
 
+ALWAYS_INLINE  X86Assembler::JmpSrc CTI::emitNakedFastCall(unsigned opcodeIndex, void* function)
+{
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, reinterpret_cast<CTIHelper_v>(function), opcodeIndex));
+    return call;
+}
+
 ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCTICall(Instruction* vPC, unsigned opcodeIndex, CTIHelper_j helper)
 {
 #if ENABLE(OPCODE_SAMPLING)
@@ -516,6 +537,14 @@ ALWAYS_INLINE void CTI::emitFastArithIntToImmNoCheck(X86Assembler::RegisterID re
     emitFastArithReTagImmediate(reg);
 }
 
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitArithIntToImmWithJump(X86Assembler::RegisterID reg)
+{
+    m_jit.addl_rr(reg, reg);
+    X86Assembler::JmpSrc jmp = m_jit.emitUnlinkedJo();
+    emitFastArithReTagImmediate(reg);
+    return jmp;
+}
+
 ALWAYS_INLINE void CTI::emitTagAsBoolImmediate(X86Assembler::RegisterID reg)
 {
     m_jit.shl_i8r(JSImmediate::ExtendedPayloadShift, reg);
@@ -1480,10 +1509,51 @@ void CTI::privateCompileMainPass()
             break;
         }
         case op_negate: {
-            emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
-            emitCTICall(instruction + i, i, Machine::cti_op_negate);
+            emitGetArg(instruction[i + 2].u.operand, X86::eax);
+            m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
+            X86Assembler::JmpSrc notImmediate = m_jit.emitUnlinkedJe();
+
+            m_jit.cmpl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
+            X86Assembler::JmpSrc zeroImmediate = m_jit.emitUnlinkedJe();
+            emitFastArithImmToInt(X86::eax);
+            m_jit.negl_r(X86::eax); // This can't overflow as we only have a 31bit int at this point
+            X86Assembler::JmpSrc overflow = emitArithIntToImmWithJump(X86::eax);
             emitPutResult(instruction[i + 1].u.operand);
-            i += 3;
+            X86Assembler::JmpSrc immediateNegateSuccess = m_jit.emitUnlinkedJmp();
+
+            if (!isSSE2Present()) {
+                m_jit.link(zeroImmediate, m_jit.label());
+                m_jit.link(overflow, m_jit.label());
+                m_jit.link(notImmediate, m_jit.label());
+                emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
+                emitCTICall(instruction + i, i, Machine::cti_op_negate);
+                emitPutResult(instruction[i + 1].u.operand);
+            } else {
+                // Slow case immediates
+                m_slowCases.append(SlowCaseEntry(zeroImmediate, i));
+                m_slowCases.append(SlowCaseEntry(overflow, i));
+                m_jit.link(notImmediate, m_jit.label());
+                ResultType resultType(instruction[i + 3].u.resultType);
+                if (!resultType.definitelyIsNumber()) {
+                    emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+                    StructureID* numberStructureID = m_callFrame->globalData().numberStructureID.get();
+                    m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
+                    m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
+                }
+                m_jit.movsd_mr(OBJECT_OFFSET(JSNumberCell, m_value), X86::eax, X86::xmm0);
+                // We need 3 copies of the sign bit mask so we can assure alignment and pad for the 128bit load
+                static double doubleSignBit[] = { -0.0, -0.0, -0.0 };
+                m_jit.xorpd_mr((void*)((((uintptr_t)doubleSignBit)+15)&~15), X86::xmm0);
+                X86Assembler::JmpSrc wasCell;
+                if (!resultType.isReusableNumber())
+                    emitAllocateNumber(&m_callFrame->globalData(), i);
+
+                putDoubleResultToJSNumberCellOrJSImmediate(X86::xmm0, X86::eax, instruction[i + 1].u.operand, &wasCell,
+                                                           X86::xmm1, X86::ecx, X86::edx);
+                m_jit.link(wasCell, m_jit.label());
+            }
+            m_jit.link(immediateNegateSuccess, m_jit.label());
+            i += 4;
             break;
         }
         case op_resolve_skip: {
@@ -2317,6 +2387,14 @@ void CTI::privateCompileSlowCases()
             i += 5;
             break;
         }
+        case op_negate: {
+            m_jit.link(iter->from, m_jit.label());
+            emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
+            emitCTICall(instruction + i, i, Machine::cti_op_negate);
+            emitPutResult(instruction[i + 1].u.operand);
+            i += 4;
+            break;
+        }
         case op_rshift: {
             m_jit.link(iter->from, m_jit.label());
             m_jit.link((++iter)->from, m_jit.label());
index 66f34a0..04bb6ca 100644 (file)
 
 #define CTI_RETURN_ADDRESS_SLOT (ARGS[-1])
 
+#if COMPILER(MSVC)
+#define FASTCALL __fastcall
+#elif COMPILER(GCC)
+#define FASTCALL  __attribute__ ((fastcall))
+#else
+#error Need to support fastcall calling convention in this compiler
+#endif
+
 namespace JSC {
 
     class CodeBlock;
@@ -405,11 +413,15 @@ namespace JSC {
         void emitFastArithImmToInt(X86Assembler::RegisterID);
         void emitFastArithIntToImmOrSlowCase(X86Assembler::RegisterID, unsigned opcodeIndex);
         void emitFastArithIntToImmNoCheck(X86Assembler::RegisterID);
+        X86Assembler::JmpSrc emitArithIntToImmWithJump(X86Assembler::RegisterID reg);
 
         void emitTagAsBoolImmediate(X86Assembler::RegisterID reg);
 
+        void emitAllocateNumber(JSGlobalData*, unsigned);
+
         X86Assembler::JmpSrc emitNakedCall(unsigned opcodeIndex, X86::RegisterID);
         X86Assembler::JmpSrc emitNakedCall(unsigned opcodeIndex, void(*function)());
+        X86Assembler::JmpSrc emitNakedFastCall(unsigned opcodeIndex, void*);
         X86Assembler::JmpSrc emitCTICall(Instruction*, unsigned opcodeIndex, CTIHelper_j);
         X86Assembler::JmpSrc emitCTICall(Instruction*, unsigned opcodeIndex, CTIHelper_o);
         X86Assembler::JmpSrc emitCTICall(Instruction*, unsigned opcodeIndex, CTIHelper_p);
index 071875d..9476fc4 100644 (file)
@@ -468,6 +468,7 @@ void CodeBlock::dump(ExecState* exec, const Vector<Instruction>::const_iterator&
         }
         case op_negate: {
             printUnaryOp(location, it, "negate");
+            ++it;
             break;
         }
         case op_add: {
index b4a31e9..c78daa2 100644 (file)
@@ -709,11 +709,13 @@ RegisterID* CodeGenerator::emitMove(RegisterID* dst, RegisterID* src)
     return dst;
 }
 
-RegisterID* CodeGenerator::emitUnaryOp(OpcodeID opcode, RegisterID* dst, RegisterID* src)
+RegisterID* CodeGenerator::emitUnaryOp(OpcodeID opcode, RegisterID* dst, RegisterID* src, ResultType type)
 {
     emitOpcode(opcode);
     instructions().append(dst->index());
     instructions().append(src->index());
+    if (opcode == op_negate)
+        instructions().append(type.toInt());
     return dst;
 }
 
index 0fb4d8f..c55a772 100644 (file)
@@ -233,7 +233,7 @@ namespace JSC {
         RegisterID* emitUnexpectedLoad(RegisterID* dst, bool);
         RegisterID* emitUnexpectedLoad(RegisterID* dst, double);
 
-        RegisterID* emitUnaryOp(OpcodeID, RegisterID* dst, RegisterID* src);
+        RegisterID* emitUnaryOp(OpcodeID, RegisterID* dst, RegisterID* src, ResultType);
         RegisterID* emitBinaryOp(OpcodeID, RegisterID* dst, RegisterID* src1, RegisterID* src2, OperandTypes);
         RegisterID* emitEqualityOp(OpcodeID, RegisterID* dst, RegisterID* src1, RegisterID* src2);
         RegisterID* emitUnaryNoDstOp(OpcodeID, RegisterID* src);
@@ -247,14 +247,14 @@ namespace JSC {
 
         RegisterID* emitMove(RegisterID* dst, RegisterID* src);
 
-        RegisterID* emitToJSNumber(RegisterID* dst, RegisterID* src) { return emitUnaryOp(op_to_jsnumber, dst, src); }
+        RegisterID* emitToJSNumber(RegisterID* dst, RegisterID* src) { return emitUnaryOp(op_to_jsnumber, dst, src, ResultType::unknown()); }
         RegisterID* emitPreInc(RegisterID* srcDst);
         RegisterID* emitPreDec(RegisterID* srcDst);
         RegisterID* emitPostInc(RegisterID* dst, RegisterID* srcDst);
         RegisterID* emitPostDec(RegisterID* dst, RegisterID* srcDst);
 
         RegisterID* emitInstanceOf(RegisterID* dst, RegisterID* value, RegisterID* base, RegisterID* basePrototype);
-        RegisterID* emitTypeOf(RegisterID* dst, RegisterID* src) { return emitUnaryOp(op_typeof, dst, src); }
+        RegisterID* emitTypeOf(RegisterID* dst, RegisterID* src) { return emitUnaryOp(op_typeof, dst, src, ResultType::unknown()); }
         RegisterID* emitIn(RegisterID* dst, RegisterID* property, RegisterID* base) { return emitBinaryOp(op_in, dst, property, base, OperandTypes()); }
 
         RegisterID* emitResolve(RegisterID* dst, const Identifier& property);
@@ -292,7 +292,7 @@ namespace JSC {
         PassRefPtr<LabelID> emitJumpSubroutine(RegisterID* retAddrDst, LabelID*);
         void emitSubroutineReturn(RegisterID* retAddrSrc);
 
-        RegisterID* emitGetPropertyNames(RegisterID* dst, RegisterID* base) { return emitUnaryOp(op_get_pnames, dst, base); }
+        RegisterID* emitGetPropertyNames(RegisterID* dst, RegisterID* base) { return emitUnaryOp(op_get_pnames, dst, base, ResultType::unknown()); }
         RegisterID* emitNextPropertyName(RegisterID* dst, RegisterID* iter, LabelID* target);
 
         RegisterID* emitCatch(RegisterID*, LabelID* start, LabelID* end);
index f22d7bd..6e32c06 100644 (file)
@@ -30,6 +30,7 @@
 #define Instruction_h
 
 #include "Opcode.h"
+#include "ResultType.h"
 #include <wtf/VectorTraits.h>
 
 namespace JSC {
@@ -58,6 +59,7 @@ namespace JSC {
             StructureID* structureID;
             StructureIDChain* structureIDChain;
             JSCell* jsCell;
+            ResultType::Type resultType;
         } u;
     };
 
index 359c4c7..1cff44e 100644 (file)
@@ -1844,6 +1844,7 @@ JSValue* Machine::privateExecute(ExecutionFlag flag, RegisterFile* registerFile,
         */
         int dst = (++vPC)->u.operand;
         JSValue* src = callFrame[(++vPC)->u.operand].jsValue(callFrame);
+        ++vPC;
         double v;
         if (fastIsNumber(src, v))
             callFrame[dst] = jsNumber(callFrame, -v);
index c33ad06..1f794dc 100644 (file)
@@ -259,6 +259,8 @@ namespace JSC {
         static JSObject* SFX_CALL cti_op_new_error(CTI_ARGS);
         static void SFX_CALL cti_op_debug(CTI_ARGS);
 
+        static JSValue* SFX_CALL cti_allocate_number(CTI_ARGS);
+
         static JSValue* SFX_CALL cti_vm_throw(CTI_ARGS);
         static void* SFX_CALL cti_vm_compile(CTI_ARGS);
         static void* SFX_CALL cti_vm_lazyLinkCall(CTI_ARGS);
index 435ceea..f838ce0 100644 (file)
@@ -53,6 +53,11 @@ namespace JSC {
             return (m_type & TypeReusable);
         }
         
+        bool isReusableNumber()
+        {
+            return isReusable() && definitelyIsNumber();
+        }
+
         bool definitelyIsNumber()
         {
             return ((m_type & ~TypeReusable) == TypeMaybeNumber);
@@ -68,6 +73,11 @@ namespace JSC {
             return !isNotNumber();
         }
         
+        int toInt()
+        {
+            return static_cast<int>(m_type);
+        }
+
         static ResultType nullType()
         {
             return ResultType(TypeMaybeNull);
index a5757aa..e592215 100644 (file)
@@ -715,7 +715,7 @@ RegisterID* PrefixErrorNode::emitCode(CodeGenerator& generator, RegisterID*)
 RegisterID* UnaryOpNode::emitCode(CodeGenerator& generator, RegisterID* dst)
 {
     RegisterID* src = generator.emitNode(m_expr.get());
-    return generator.emitUnaryOp(opcode(), generator.finalDestination(dst), src);
+    return generator.emitUnaryOp(opcode(), generator.finalDestination(dst), src, m_expr->resultDescriptor());
 }
 
 // ------------------------------ Binary Operation Nodes -----------------------------------
@@ -726,7 +726,7 @@ RegisterID* BinaryOpNode::emitCode(CodeGenerator& generator, RegisterID* dst)
     if (opcode == op_neq) {
         if (m_expr1->isNull() || m_expr2->isNull()) {
             RefPtr<RegisterID> src = generator.emitNode(dst, m_expr1->isNull() ? m_expr2.get() : m_expr1.get());
-            return generator.emitUnaryOp(op_neq_null, generator.finalDestination(dst, src.get()), src.get());
+            return generator.emitUnaryOp(op_neq_null, generator.finalDestination(dst, src.get()), src.get(), ResultType::unknown());
         }
     }
 
@@ -739,7 +739,7 @@ RegisterID* EqualNode::emitCode(CodeGenerator& generator, RegisterID* dst)
 {
     if (m_expr1->isNull() || m_expr2->isNull()) {
         RefPtr<RegisterID> src = generator.emitNode(dst, m_expr1->isNull() ? m_expr2.get() : m_expr1.get());
-        return generator.emitUnaryOp(op_eq_null, generator.finalDestination(dst, src.get()), src.get());
+        return generator.emitUnaryOp(op_eq_null, generator.finalDestination(dst, src.get()), src.get(), ResultType::unknown());
     }
 
     RefPtr<RegisterID> src1 = generator.emitNodeForLeftHandSide(m_expr1.get(), m_rightHasAssignments, m_expr2->isPure(generator));
index 6a45929..547c94e 100644 (file)
@@ -228,6 +228,7 @@ public:
         OP2_CVTSI2SD_VsdEd  = 0x2A,
         OP2_CVTTSD2SI_GdWsd = 0x2C,
         OP2_UCOMISD_VsdWsd  = 0x2E,
+        OP2_XORPD_VsdWsd    = 0x57,
         OP2_ADDSD_VsdWsd    = 0x58,
         OP2_MULSD_VsdWsd    = 0x59,
         OP2_SUBSD_VsdWsd    = 0x5C,
@@ -263,6 +264,7 @@ public:
         GROUP2_OP_SAR = 7,
 
         GROUP3_OP_TEST = 0,
+        GROUP3_OP_NEG  = 3,
         GROUP3_OP_IDIV = 7,
 
         GROUP5_OP_CALLN = 2,
@@ -605,6 +607,12 @@ public:
         emitModRm_opr(GROUP3_OP_IDIV, dst);
     }
 
+    void negl_r(RegisterID dst)
+    {
+        m_buffer->putByte(OP_GROUP3_Ev);
+        emitModRm_opr(GROUP3_OP_NEG, dst);
+    }
+
     void cdq()
     {
         m_buffer->putByte(OP_CDQ);
@@ -741,6 +749,14 @@ public:
         emitModRm_rm((RegisterID)dst, base, offset);
     }
 
+    void xorpd_mr(void* addr, XMMRegisterID dst)
+    {
+        m_buffer->putByte(PRE_SSE_66);
+        m_buffer->putByte(OP_2BYTE_ESCAPE);
+        m_buffer->putByte(OP2_XORPD_VsdWsd);
+        emitModRm_rm((RegisterID)dst, addr);
+    }
+
     void movsd_rm(XMMRegisterID src, int offset, RegisterID base)
     {
         m_buffer->putByte(PRE_SSE_F2);
index 8e23d62..e2f6990 100644 (file)
@@ -84,6 +84,11 @@ namespace JSC {
 
         static PassRefPtr<StructureID> createStructureID(JSValue* proto) { return StructureID::create(proto, TypeInfo(NumberType, NeedsThisConversion)); }
 
+        JSNumberCell(JSGlobalData* globalData)
+        : JSCell(globalData->numberStructureID.get())
+        {
+        }
+
     private:
         JSNumberCell(JSGlobalData* globalData, double value)
             : JSCell(globalData->numberStructureID.get())