2008-09-14 Maciej Stachowiak <mjs@apple.com>
authorbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 15 Sep 2008 02:18:13 +0000 (02:18 +0000)
committerbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Mon, 15 Sep 2008 02:18:13 +0000 (02:18 +0000)
        Reviewed by Cameron Zwarich.

        - split the "prototype" lookup for hasInstance into opcode stream so it can be cached

        ~5% speedup on v8 earley-boyer test

        * API/JSCallbackObject.h: Add a parameter for the pre-looked-up prototype.
        * API/JSCallbackObjectFunctions.h:
        (JSC::::hasInstance): Ditto.
        * API/JSValueRef.cpp:
        (JSValueIsInstanceOfConstructor): Look up and pass in prototype.
        * JavaScriptCore.exp:
        * VM/CTI.cpp:
        (JSC::CTI::privateCompileMainPass): Pass along prototype.
        * VM/CodeBlock.cpp:
        (JSC::CodeBlock::dump): Print third arg.
        * VM/CodeGenerator.cpp:
        (JSC::CodeGenerator::emitInstanceOf): Implement this, now that there
        is a third argument.
        * VM/CodeGenerator.h:
        * VM/Machine.cpp:
        (JSC::Machine::privateExecute): Pass along the prototype.
        (JSC::Machine::cti_op_instanceof): ditto
        * kjs/JSObject.cpp:
        (JSC::JSObject::hasInstance): Expect to get a pre-looked-up prototype.
        * kjs/JSObject.h:
        * kjs/nodes.cpp:
        (JSC::InstanceOfNode::emitCode): Emit a get_by_id of the prototype
        property and pass that register to instanceof.
        * kjs/nodes.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@36418 268f45cc-cd09-0410-ab3c-d52691b4dbfc

JavaScriptCore/ChangeLog
JavaScriptCore/VM/CTI.cpp
JavaScriptCore/VM/CTI.h
JavaScriptCore/VM/CodeBlock.cpp
JavaScriptCore/VM/CodeBlock.h
JavaScriptCore/VM/Machine.cpp
JavaScriptCore/VM/Machine.h
JavaScriptCore/masm/X86Assembler.h

index db09614..2e4207b 100644 (file)
 
         Reviewed by Sam Weinig.
 
+        Accelerated property accesses.
+
+        Inline more of the array access code into the JIT code for get/put_by_val.
+        Accelerate get/put_by_id by speculatively inlining a disable direct access
+        into the hot path of the code, and repatch this with the correct StructureID
+        and property map offset once these are known.  In the case of accesses to the
+        prototype and reading the array-length a trampoline is genertaed, and the
+        branch to the slow-case is relinked to jump to this.
+
+        By repatching, we mean rewriting the x86 instruction stream.  Instructions are
+        only modified in a simple fasion - altering immediate operands, memory access
+        deisplacements, and branch offsets.
+        
+        For regular get_by_id/put_by_id accesses to an object, a StructureID in an
+        instruction's immediate operant is updateded, and a memory access operation's
+        displacement is updated to access the correct field on the object.  In the case
+        of more complex accesses (array length and get_by_id_prototype) the offset on
+        the branch to slow-case is updated, to now jump to a trampoline.
+
+        +2.8% sunspider, +13% v8-tests
+
+        * VM/CTI.cpp:
+        (JSC::CTI::emitCall):
+        (JSC::CTI::emitJumpSlowCaseIfNotJSCell):
+        (JSC::CTI::CTI):
+        (JSC::CTI::privateCompileMainPass):
+        (JSC::CTI::privateCompileSlowCases):
+        (JSC::CTI::privateCompile):
+        (JSC::CTI::privateCompileGetByIdSelf):
+        (JSC::CTI::privateCompileGetByIdProto):
+        (JSC::CTI::privateCompileGetByIdChain):
+        (JSC::CTI::privateCompilePutByIdReplace):
+        (JSC::CTI::privateCompilePutByIdTransition):
+        (JSC::CTI::privateCompileArrayLengthTrampoline):
+        (JSC::CTI::privateCompileStringLengthTrampoline):
+        (JSC::CTI::patchGetByIdSelf):
+        (JSC::CTI::patchPutByIdReplace):
+        (JSC::CTI::privateCompilePatchGetArrayLength):
+        (JSC::CTI::privateCompilePatchGetStringLength):
+        * VM/CTI.h:
+        (JSC::CTI::compileGetByIdSelf):
+        (JSC::CTI::compileGetByIdProto):
+        (JSC::CTI::compileGetByIdChain):
+        (JSC::CTI::compilePutByIdReplace):
+        (JSC::CTI::compilePutByIdTransition):
+        (JSC::CTI::compileArrayLengthTrampoline):
+        (JSC::CTI::compileStringLengthTrampoline):
+        (JSC::CTI::compilePatchGetArrayLength):
+        (JSC::CTI::compilePatchGetStringLength):
+        * VM/CodeBlock.cpp:
+        (JSC::CodeBlock::dump):
+        (JSC::CodeBlock::~CodeBlock):
+        * VM/CodeBlock.h:
+        (JSC::StructureStubInfo::StructureStubInfo):
+        (JSC::CodeBlock::getStubInfo):
+        * VM/Machine.cpp:
+        (JSC::Machine::tryCTICachePutByID):
+        (JSC::Machine::tryCTICacheGetByID):
+        (JSC::Machine::cti_op_put_by_val_array):
+        * VM/Machine.h:
+        * masm/X86Assembler.h:
+        (JSC::X86Assembler::):
+        (JSC::X86Assembler::cmpl_i8m):
+        (JSC::X86Assembler::emitUnlinkedJa):
+        (JSC::X86Assembler::getRelocatedAddress):
+        (JSC::X86Assembler::getDifferenceBetweenLabels):
+        (JSC::X86Assembler::emitModRm_opmsib):
+
+2008-09-14  Gavin Barraclough  <barraclough@apple.com>
+
+        Reviewed by Sam Weinig.
+
         Remove unnecessary virtual function call from cti_op_call_JSFunction -
         ~5% on richards, ~2.5% on v8-tests, ~0.5% on sunspider.
 
index ac58c77..2e5b4d3 100644 (file)
@@ -242,64 +242,85 @@ void CTI::printOpcodeOperandTypes(unsigned src1, unsigned src2)
 
 #endif
 
-ALWAYS_INLINE void CTI::emitCall(unsigned opcodeIndex, CTIHelper_j helper)
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCall(unsigned opcodeIndex, CTIHelper_j helper)
 {
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(1, &inCalledCode);
 #endif
-    m_calls.append(CallRecord(m_jit.emitCall(), helper, opcodeIndex));
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, helper, opcodeIndex));
     emitDebugExceptionCheck();
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(0, &inCalledCode);
 #endif
+
+    return call;
 }
 
-ALWAYS_INLINE void CTI::emitCall(unsigned opcodeIndex, CTIHelper_p helper)
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCall(unsigned opcodeIndex, CTIHelper_p helper)
 {
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(1, &inCalledCode);
 #endif
-    m_calls.append(CallRecord(m_jit.emitCall(), helper, opcodeIndex));
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, helper, opcodeIndex));
     emitDebugExceptionCheck();
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(0, &inCalledCode);
 #endif
+
+    return call;
 }
 
-ALWAYS_INLINE void CTI::emitCall(unsigned opcodeIndex, CTIHelper_b helper)
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCall(unsigned opcodeIndex, CTIHelper_b helper)
 {
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(1, &inCalledCode);
 #endif
-    m_calls.append(CallRecord(m_jit.emitCall(), helper, opcodeIndex));
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, helper, opcodeIndex));
     emitDebugExceptionCheck();
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(0, &inCalledCode);
 #endif
+
+    return call;
 }
 
-ALWAYS_INLINE void CTI::emitCall(unsigned opcodeIndex, CTIHelper_v helper)
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCall(unsigned opcodeIndex, CTIHelper_v helper)
 {
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(1, &inCalledCode);
 #endif
-    m_calls.append(CallRecord(m_jit.emitCall(), helper, opcodeIndex));
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, helper, opcodeIndex));
     emitDebugExceptionCheck();
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(0, &inCalledCode);
 #endif
+
+    return call;
 }
 
-ALWAYS_INLINE void CTI::emitCall(unsigned opcodeIndex, CTIHelper_s helper)
+ALWAYS_INLINE X86Assembler::JmpSrc CTI::emitCall(unsigned opcodeIndex, CTIHelper_s helper)
 {
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(1, &inCalledCode);
 #endif
-    m_calls.append(CallRecord(m_jit.emitCall(), helper, opcodeIndex));
+    X86Assembler::JmpSrc call = m_jit.emitCall();
+    m_calls.append(CallRecord(call, helper, opcodeIndex));
     emitDebugExceptionCheck();
 #if ENABLE(SAMPLING_TOOL)
     m_jit.movl_i32m(0, &inCalledCode);
 #endif
+
+    return call;
+}
+
+ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex)
+{
+    m_jit.testl_i32r(JSImmediate::TagMask, reg);
+    m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), opcodeIndex));
 }
 
 ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotImm(X86Assembler::RegisterID reg, unsigned opcodeIndex)
@@ -361,6 +382,7 @@ CTI::CTI(Machine* machine, ExecState* exec, CodeBlock* codeBlock)
     , m_exec(exec)
     , m_codeBlock(codeBlock)
     , m_labels(codeBlock ? codeBlock->instructions.size() : 0)
+    , m_structureStubCompilationInfo(codeBlock ? codeBlock->structureIDInstructions.size() : 0)
 {
 }
 
@@ -469,6 +491,8 @@ void CTI::privateCompileMainPass()
     Instruction* instruction = m_codeBlock->instructions.begin();
     unsigned instructionCount = m_codeBlock->instructions.size();
 
+    unsigned structureIDInstructionIndex = 0;
+
     for (unsigned i = 0; i < instructionCount; ) {
         m_labels[i] = m_jit.label();
 
@@ -607,23 +631,58 @@ void CTI::privateCompileMainPass()
             break;
         }
         case op_put_by_id: {
-            Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 2].u.operand]);
-            emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
+            // In order to be able to repatch both the StructureID, and the object offset, we store one pointer,
+            // to just after the arguments have been loaded into registers 'hotPathBegin', and we generate code
+            // such that the StructureID & offset are always at the same distance from this.
+
             emitGetArg(instruction[i + 1].u.operand, X86::eax);
             emitGetArg(instruction[i + 3].u.operand, X86::edx);
-            emitPutArg(X86::eax, 0); // leave the base in eax
-            emitPutArg(X86::edx, 8); // leave the base in edx
-            emitCall(i, Machine::cti_op_put_by_id);
+
+            ASSERT(m_codeBlock->structureIDInstructions[structureIDInstructionIndex].opcodeIndex == i);
+            X86Assembler::JmpDst hotPathBegin = m_jit.label();
+            m_structureStubCompilationInfo[structureIDInstructionIndex].hotPathBegin = hotPathBegin;
+            ++structureIDInstructionIndex;
+
+            // Jump to a slow case if either the base object is an immediate, or if the StructureID does not match.
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+            // It is important that the following instruction plants a 32bit immediate, in order that it can be patched over.
+            m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetPutByIdStructureID);
+            m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
+
+            // Plant a load from a bogus ofset in the object's property map; we will patch this later, if it is to be used.
+            m_jit.movl_mr(OBJECT_OFFSET(JSObject, m_propertyStorage), X86::eax, X86::eax);
+            m_jit.movl_rm(X86::edx, repatchGetByIdDefaultOffset, X86::eax);
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetPutByIdPropertyMapOffset);
+
             i += 8;
             break;
         }
         case op_get_by_id: {
-            Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 3].u.operand]);
-            emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
+            // As for put_by_id, get_by_id requires the offset of the StructureID and the offset of the access to be repatched.
+            // Additionally, for get_by_id we need repatch the offset of the branch to the slow case (we repatch this to jump
+            // to array-length / prototype access tranpolines, and finally we also the the property-map access offset as a label
+            // to jump back to if one of these trampolies finds a match.
+
             emitGetArg(instruction[i + 2].u.operand, X86::eax);
-            emitPutArg(X86::eax, 0); // leave the base in eax
-            emitCall(i, Machine::cti_op_get_by_id);
-            emitPutResult(instruction[i + 1].u.operand);
+
+            ASSERT(m_codeBlock->structureIDInstructions[structureIDInstructionIndex].opcodeIndex == i);
+
+            X86Assembler::JmpDst hotPathBegin = m_jit.label();
+            m_structureStubCompilationInfo[structureIDInstructionIndex].hotPathBegin = hotPathBegin;
+            ++structureIDInstructionIndex;
+
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+            m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetGetByIdStructureID);
+            m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetGetByIdBranchToSlowCase);
+
+            m_jit.movl_mr(OBJECT_OFFSET(JSObject, m_propertyStorage), X86::eax, X86::eax);
+            m_jit.movl_mr(repatchGetByIdDefaultOffset, X86::eax, X86::ecx);
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetGetByIdPropertyMapOffset);
+            emitPutResult(instruction[i + 1].u.operand, X86::ecx);
+
             i += 8;
             break;
         }
@@ -777,11 +836,14 @@ void CTI::privateCompileMainPass()
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(m_machine->m_jsArrayVptr), X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
+
+            // This is an array; get the m_storage pointer into ecx, then check if the index is below the fast cutoff
+            m_jit.movl_mr(OBJECT_OFFSET(JSArray, m_storage), X86::eax, X86::ecx);
             m_jit.cmpl_rm(X86::edx, OBJECT_OFFSET(JSArray, m_fastAccessCutoff), X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJbe(), i));
 
-            m_jit.movl_mr(OBJECT_OFFSET(JSArray, m_storage), X86::eax, X86::eax);
-            m_jit.movl_mr(OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::eax, X86::edx, sizeof(JSValue*), X86::eax);
+            // Get the value from the vector
+            m_jit.movl_mr(OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::ecx, X86::edx, sizeof(JSValue*), X86::eax);
             emitPutResult(instruction[i + 1].u.operand);
             i += 4;
             break;
@@ -810,18 +872,30 @@ void CTI::privateCompileMainPass()
         case op_put_by_val: {
             emitGetArg(instruction[i + 1].u.operand, X86::eax);
             emitGetArg(instruction[i + 2].u.operand, X86::edx);
-            emitGetArg(instruction[i + 3].u.operand, X86::ecx);
             emitJumpSlowCaseIfNotImm(X86::edx, i);
             emitFastArithImmToInt(X86::edx);
             m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(m_machine->m_jsArrayVptr), X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
+
+            // This is an array; get the m_storage pointer into ecx, then check if the index is below the fast cutoff
+            m_jit.movl_mr(OBJECT_OFFSET(JSArray, m_storage), X86::eax, X86::ecx);
             m_jit.cmpl_rm(X86::edx, OBJECT_OFFSET(JSArray, m_fastAccessCutoff), X86::eax);
+            X86Assembler::JmpSrc inFastVector = m_jit.emitUnlinkedJa();
+            // No; oh well, check if the access if within the vector - if so, we may still be okay.
+            m_jit.cmpl_rm(X86::edx, OBJECT_OFFSET(ArrayStorage, m_vectorLength), X86::ecx);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJbe(), i));
 
-            m_jit.movl_mr(OBJECT_OFFSET(JSArray, m_storage), X86::eax, X86::eax);
-            m_jit.movl_rm(X86::ecx, OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::eax, X86::edx, sizeof(JSValue*));
+            // This is a write to the slow part of the vector; first, we have to check if this would be the first write to this location.
+            // FIXME: should be able to handle initial write to array; increment the the number of items in the array, and potentially update fast access cutoff. 
+            m_jit.cmpl_i8m(0, OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::ecx, X86::edx, sizeof(JSValue*));
+            m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJe(), i));
+
+            // All good - put the value into the array.
+            m_jit.link(inFastVector, m_jit.label());
+            emitGetArg(instruction[i + 3].u.operand, X86::eax);
+            m_jit.movl_rm(X86::eax, OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::ecx, X86::edx, sizeof(JSValue*));
             i += 4;
             break;
         }
@@ -1339,6 +1413,8 @@ void CTI::privateCompileMainPass()
             ASSERT_NOT_REACHED();
         }
     }
+
+    ASSERT(structureIDInstructionIndex == m_codeBlock->structureIDInstructions.size());
 }
 
 
@@ -1363,9 +1439,11 @@ void CTI::privateCompileLinkPass()
     
 void CTI::privateCompileSlowCases()
 {
+    unsigned structureIDInstructionIndex = 0;
+
     Instruction* instruction = m_codeBlock->instructions.begin();
     for (Vector<SlowCaseEntry>::iterator iter = m_slowCases.begin(); iter != m_slowCases.end(); ++iter) {
-        int i = iter->to;
+        unsigned i = iter->to;
         m_jit.emitRestoreArgumentReference();
         switch (m_machine->getOpcodeID(instruction[i].u.opcode)) {
         case op_add: {
@@ -1402,16 +1480,33 @@ void CTI::privateCompileSlowCases()
             break;
         }
         case op_get_by_val: {
+            // The slow case that handles accesses to arrays (below) may jump back up to here. 
+            X86Assembler::JmpDst beginGetByValSlow = m_jit.label();
+
             X86Assembler::JmpSrc notImm = iter->from;
             m_jit.link((++iter)->from, m_jit.label());
             m_jit.link((++iter)->from, m_jit.label());
-            m_jit.link((++iter)->from, m_jit.label());
             emitFastArithIntToImmNoCheck(X86::edx);
             m_jit.link(notImm, m_jit.label());
             emitPutArg(X86::eax, 0);
             emitPutArg(X86::edx, 4);
             emitCall(i, Machine::cti_op_get_by_val);
             emitPutResult(instruction[i + 1].u.operand);
+            m_jit.link(m_jit.emitUnlinkedJmp(), m_labels[i + 4]);
+
+            // This is slow case that handles accesses to arrays above the fast cut-off.
+            // First, check if this is an access to the vector
+            m_jit.link((++iter)->from, m_jit.label());
+            m_jit.cmpl_rm(X86::edx, OBJECT_OFFSET(ArrayStorage, m_vectorLength), X86::ecx);
+            m_jit.link(m_jit.emitUnlinkedJbe(), beginGetByValSlow);
+
+            // okay, missed the fast region, but it is still in the vector.  Get the value.
+            m_jit.movl_mr(OBJECT_OFFSET(ArrayStorage, m_vector[0]), X86::ecx, X86::edx, sizeof(JSValue*), X86::ecx);
+            // Check whether the value loaded is zero; if so we need to return undefined.
+            m_jit.testl_rr(X86::ecx, X86::ecx);
+            m_jit.link(m_jit.emitUnlinkedJe(), beginGetByValSlow);
+            emitPutResult(instruction[i + 1].u.operand, X86::ecx);
+            
             i += 4;
             break;
         }
@@ -1476,6 +1571,52 @@ void CTI::privateCompileSlowCases()
             i += 4;
             break;
         }
+        case op_put_by_id: {
+            m_jit.link(iter->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
+
+            Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 2].u.operand]);
+            emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
+            emitPutArg(X86::eax, 0);
+            emitPutArg(X86::edx, 8);
+            X86Assembler::JmpSrc call = emitCall(i, Machine::cti_op_put_by_id);
+
+            // Track the location of the call; this will be used to recover repatch information.
+            ASSERT(m_codeBlock->structureIDInstructions[structureIDInstructionIndex].opcodeIndex == i);
+            m_structureStubCompilationInfo[structureIDInstructionIndex].callReturnLocation = call;
+            ++structureIDInstructionIndex;
+
+            i += 8;
+            break;
+        }
+        case op_get_by_id: {
+            // As for the hot path of get_by_id, above, we ensure that we can use an architecture specific offset
+            // so that we only need track one pointer into the slow case code - we track a pointer to the location
+            // of the call (which we can use to look up the repatch information), but should a array-length or
+            // prototype access tramopile fail we want to bail out back to here.  To do so we can subtract back
+            // the distance from the call to the head of the slow case.
+
+            m_jit.link(iter->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
+
+#ifndef NDEBUG
+            X86Assembler::JmpDst coldPathBegin = m_jit.label();
+#endif        
+            emitPutArg(X86::eax, 0);
+            Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 3].u.operand]);
+            emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
+            X86Assembler::JmpSrc call = emitCall(i, Machine::cti_op_get_by_id);
+            ASSERT(X86Assembler::getDifferenceBetweenLabels(coldPathBegin, call) == repatchOffsetGetByIdSlowCaseCall);
+            emitPutResult(instruction[i + 1].u.operand);
+
+            // Track the location of the call; this will be used to recover repatch information.
+            ASSERT(m_codeBlock->structureIDInstructions[structureIDInstructionIndex].opcodeIndex == i);
+            m_structureStubCompilationInfo[structureIDInstructionIndex].callReturnLocation = call;
+            ++structureIDInstructionIndex;
+
+            i += 8;
+            break;
+        }
         case op_loop_if_lesseq: {
             emitSlowScriptCheck(i);
 
@@ -1513,16 +1654,28 @@ void CTI::privateCompileSlowCases()
             break;
         }
         case op_put_by_val: {
+            // Normal slow cases - either is not an immediate imm, or is an array.
             X86Assembler::JmpSrc notImm = iter->from;
             m_jit.link((++iter)->from, m_jit.label());
             m_jit.link((++iter)->from, m_jit.label());
-            m_jit.link((++iter)->from, m_jit.label());
             emitFastArithIntToImmNoCheck(X86::edx);
             m_jit.link(notImm, m_jit.label());
+            emitGetArg(instruction[i + 3].u.operand, X86::ecx);
             emitPutArg(X86::eax, 0);
             emitPutArg(X86::edx, 4);
             emitPutArg(X86::ecx, 8);
             emitCall(i, Machine::cti_op_put_by_val);
+            m_jit.link(m_jit.emitUnlinkedJmp(), m_labels[i + 4]);
+
+            // slow cases for immediate int accesses to arrays
+            m_jit.link((++iter)->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
+            emitGetArg(instruction[i + 3].u.operand, X86::ecx);
+            emitPutArg(X86::eax, 0);
+            emitPutArg(X86::edx, 4);
+            emitPutArg(X86::ecx, 8);
+            emitCall(i, Machine::cti_op_put_by_val_array);
+
             i += 4;
             break;
         }
@@ -1700,6 +1853,8 @@ void CTI::privateCompileSlowCases()
 
         m_jit.link(m_jit.emitUnlinkedJmp(), m_labels[i]);
     }
+
+    ASSERT(structureIDInstructionIndex == m_codeBlock->structureIDInstructions.size());
 }
 
 void CTI::privateCompile()
@@ -1761,10 +1916,17 @@ void CTI::privateCompile()
     for (Vector<JSRInfo>::iterator iter = m_jsrSites.begin(); iter != m_jsrSites.end(); ++iter)
         X86Assembler::linkAbsoluteAddress(code, iter->addrPosition, iter->target);
 
+    for (unsigned i = 0; i < m_codeBlock->structureIDInstructions.size(); ++i) {
+        StructureStubInfo& info = m_codeBlock->structureIDInstructions[i];
+        info.callReturnLocation = X86Assembler::getRelocatedAddress(code, m_structureStubCompilationInfo[i].callReturnLocation);
+        info.hotPathBegin = X86Assembler::getRelocatedAddress(code, m_structureStubCompilationInfo[i].hotPathBegin);
+    }
+
+
     m_codeBlock->ctiCode = code;
 }
 
-void* CTI::privateCompileGetByIdSelf(StructureID* structureID, size_t cachedOffset)
+void CTI::privateCompileGetByIdSelf(StructureID* structureID, size_t cachedOffset, void* returnAddress)
 {
     // Check eax is an object of the right StructureID.
     m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
@@ -1783,16 +1945,65 @@ void* CTI::privateCompileGetByIdSelf(StructureID* structureID, size_t cachedOffs
     X86Assembler::link(code, failureCases1, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
     X86Assembler::link(code, failureCases2, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
     
-    m_codeBlock->structureIDAccessStubs.append(code);
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
     
-    return code;
+    ctiRepatchCallByReturnAddress(returnAddress, code);
 }
 
-void* CTI::privateCompileGetByIdProto(ExecState* exec, StructureID* structureID, StructureID* prototypeStructureID, size_t cachedOffset)
+void CTI::privateCompileGetByIdProto(StructureID* structureID, StructureID* prototypeStructureID, size_t cachedOffset, void* returnAddress)
 {
+#if USE(CTI_REPATCH_PIC)
+    StructureStubInfo& info = m_codeBlock->getStubInfo(returnAddress);
+
+    // We don't want to repatch more than once - in future go to cti_op_put_by_id_generic.
+    ctiRepatchCallByReturnAddress(returnAddress, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
+
     // The prototype object definitely exists (if this stub exists the CodeBlock is referencing a StructureID that is
     // referencing the prototype object - let's speculatively load it's table nice and early!)
-    JSObject* protoObject = static_cast<JSObject*>(structureID->prototypeForLookup(exec));
+    JSObject* protoObject = static_cast<JSObject*>(structureID->prototypeForLookup(m_exec));
+    PropertyStorage* protoPropertyStorage = &protoObject->m_propertyStorage;
+    m_jit.movl_mr(static_cast<void*>(protoPropertyStorage), X86::edx);
+
+    // check eax is an object of the right StructureID.
+    m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
+    X86Assembler::JmpSrc failureCases1 = m_jit.emitUnlinkedJne();
+    m_jit.cmpl_i32m(reinterpret_cast<uint32_t>(structureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
+    X86Assembler::JmpSrc failureCases2 = m_jit.emitUnlinkedJne();
+
+    // Check the prototype object's StructureID had not changed.
+    StructureID** protoStructureIDAddress = &(protoObject->m_structureID);
+    m_jit.cmpl_i32m(reinterpret_cast<uint32_t>(prototypeStructureID), static_cast<void*>(protoStructureIDAddress));
+    X86Assembler::JmpSrc failureCases3 = m_jit.emitUnlinkedJne();
+
+    // Checks out okay! - getDirectOffset
+    m_jit.movl_mr(cachedOffset * sizeof(JSValue*), X86::edx, X86::ecx);
+
+    X86Assembler::JmpSrc success = m_jit.emitUnlinkedJmp();
+
+    void* code = m_jit.copy();
+    ASSERT(code);
+
+    // Use the repatch information to link the failure cases back to the original slow case routine.
+    void* slowCaseBegin = reinterpret_cast<char*>(info.callReturnLocation) - repatchOffsetGetByIdSlowCaseCall;
+    X86Assembler::link(code, failureCases1, slowCaseBegin);
+    X86Assembler::link(code, failureCases2, slowCaseBegin);
+    X86Assembler::link(code, failureCases3, slowCaseBegin);
+
+    // On success return back to the hot patch code, at a point it will perform the store to dest for us.
+    intptr_t successDest = (intptr_t)(info.hotPathBegin) + repatchOffsetGetByIdPropertyMapOffset;
+    X86Assembler::link(code, success, reinterpret_cast<void*>(successDest));
+
+    // Track the stub we have created so that it will be deleted later.
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
+
+    // Finally repatch the jump to sow case back in the hot path to jump here instead.
+    // FIXME: should revert this repatching, on failure.
+    intptr_t jmpLocation = reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetGetByIdBranchToSlowCase;
+    X86Assembler::repatchBranchOffset(jmpLocation, code);
+#else
+    // The prototype object definitely exists (if this stub exists the CodeBlock is referencing a StructureID that is
+    // referencing the prototype object - let's speculatively load it's table nice and early!)
+    JSObject* protoObject = static_cast<JSObject*>(structureID->prototypeForLookup(m_exec));
     PropertyStorage* protoPropertyStorage = &protoObject->m_propertyStorage;
     m_jit.movl_mr(static_cast<void*>(protoPropertyStorage), X86::edx);
 
@@ -1819,12 +2030,13 @@ void* CTI::privateCompileGetByIdProto(ExecState* exec, StructureID* structureID,
     X86Assembler::link(code, failureCases2, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
     X86Assembler::link(code, failureCases3, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
 
-    m_codeBlock->structureIDAccessStubs.append(code);
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
 
-    return code;
+    ctiRepatchCallByReturnAddress(returnAddress, code);
+#endif
 }
 
-void* CTI::privateCompileGetByIdChain(ExecState* exec, StructureID* structureID, StructureIDChain* chain, size_t count, size_t cachedOffset)
+void CTI::privateCompileGetByIdChain(StructureID* structureID, StructureIDChain* chain, size_t count, size_t cachedOffset, void* returnAddress)
 {
     ASSERT(count);
     
@@ -1840,7 +2052,7 @@ void* CTI::privateCompileGetByIdChain(ExecState* exec, StructureID* structureID,
     RefPtr<StructureID>* chainEntries = chain->head();
     JSObject* protoObject = 0;
     for (unsigned i = 0; i<count; ++i) {
-        protoObject = static_cast<JSObject*>(currStructureID->prototypeForLookup(exec));
+        protoObject = static_cast<JSObject*>(currStructureID->prototypeForLookup(m_exec));
         currStructureID = chainEntries[i].get();
 
         // Check the prototype object's StructureID had not changed.
@@ -1862,11 +2074,13 @@ void* CTI::privateCompileGetByIdChain(ExecState* exec, StructureID* structureID,
 
     for (unsigned i = 0; i < bucketsOfFail.size(); ++i)
         X86Assembler::link(code, bucketsOfFail[i], reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
-    m_codeBlock->structureIDAccessStubs.append(code);
-    return code;
+
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
+
+    ctiRepatchCallByReturnAddress(returnAddress, code);
 }
 
-void* CTI::privateCompilePutByIdReplace(StructureID* structureID, size_t cachedOffset)
+void CTI::privateCompilePutByIdReplace(StructureID* structureID, size_t cachedOffset, void* returnAddress)
 {
     // check eax is an object of the right StructureID.
     m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
@@ -1885,9 +2099,9 @@ void* CTI::privateCompilePutByIdReplace(StructureID* structureID, size_t cachedO
     X86Assembler::link(code, failureCases1, reinterpret_cast<void*>(Machine::cti_op_put_by_id_fail));
     X86Assembler::link(code, failureCases2, reinterpret_cast<void*>(Machine::cti_op_put_by_id_fail));
 
-    m_codeBlock->structureIDAccessStubs.append(code);
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
     
-    return code;
+    ctiRepatchCallByReturnAddress(returnAddress, code);
 }
 
 extern "C" {
@@ -1921,7 +2135,7 @@ static inline bool transitionWillNeedStorageRealloc(StructureID* oldStructureID,
     return false;
 }
 
-void* CTI::privateCompilePutByIdTransition(StructureID* oldStructureID, StructureID* newStructureID, size_t cachedOffset, StructureIDChain* sIDC)
+void CTI::privateCompilePutByIdTransition(StructureID* oldStructureID, StructureID* newStructureID, size_t cachedOffset, StructureIDChain* sIDC, void* returnAddress)
 {
     Vector<X86Assembler::JmpSrc, 16> failureCases;
     // check eax is an object of the right StructureID.
@@ -1992,12 +2206,12 @@ void* CTI::privateCompilePutByIdTransition(StructureID* oldStructureID, Structur
     if (transitionWillNeedStorageRealloc(oldStructureID, newStructureID))
         X86Assembler::link(code, callTarget, reinterpret_cast<void*>(transitionObject));
     
-    m_codeBlock->structureIDAccessStubs.append(code);
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
     
-    return code;
+    ctiRepatchCallByReturnAddress(returnAddress, code);
 }
 
-void* CTI::privateArrayLengthTrampoline()
+void* CTI::privateCompileArrayLengthTrampoline()
 {
     // Check eax is an array
     m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
@@ -2025,7 +2239,7 @@ void* CTI::privateArrayLengthTrampoline()
     return code;
 }
 
-void* CTI::privateStringLengthTrampoline()
+void* CTI::privateCompileStringLengthTrampoline()
 {
     // Check eax is a string
     m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
@@ -2053,6 +2267,77 @@ void* CTI::privateStringLengthTrampoline()
     return code;
 }
 
+void CTI::patchGetByIdSelf(CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress)
+{
+    StructureStubInfo& info = codeBlock->getStubInfo(returnAddress);
+
+    // We don't want to repatch more than once - in future go to cti_op_get_by_id_generic.
+    // Should probably go to Machine::cti_op_get_by_id_fail, but that doesn't do anything interesting right now.
+    ctiRepatchCallByReturnAddress(returnAddress, (void*)(Machine::cti_op_get_by_id_generic));
+
+    // Repatch the offset into the propoerty map to load from, then repatch the StructureID to look for.
+    X86Assembler::repatchDisplacement(reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetGetByIdPropertyMapOffset, cachedOffset * sizeof(JSValue*));
+    X86Assembler::repatchImmediate(reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetGetByIdStructureID, reinterpret_cast<uint32_t>(structureID));
+}
+
+void CTI::patchPutByIdReplace(CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress)
+{
+    StructureStubInfo& info = codeBlock->getStubInfo(returnAddress);
+    
+    // We don't want to repatch more than once - in future go to cti_op_put_by_id_generic.
+    // Should probably go to Machine::cti_op_put_by_id_fail, but that doesn't do anything interesting right now.
+    ctiRepatchCallByReturnAddress(returnAddress, (void*)(Machine::cti_op_put_by_id_generic));
+
+    // Repatch the offset into the propoerty map to load from, then repatch the StructureID to look for.
+    X86Assembler::repatchDisplacement(reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetPutByIdPropertyMapOffset, cachedOffset * sizeof(JSValue*));
+    X86Assembler::repatchImmediate(reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetPutByIdStructureID, reinterpret_cast<uint32_t>(structureID));
+}
+
+void CTI::privateCompilePatchGetArrayLength(void* returnAddress)
+{
+    StructureStubInfo& info = m_codeBlock->getStubInfo(returnAddress);
+
+    // We don't want to repatch more than once - in future go to cti_op_put_by_id_generic.
+    ctiRepatchCallByReturnAddress(returnAddress, reinterpret_cast<void*>(Machine::cti_op_get_by_id_fail));
+
+    // Check eax is an array
+    m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
+    X86Assembler::JmpSrc failureCases1 = m_jit.emitUnlinkedJne();
+    m_jit.cmpl_i32m(reinterpret_cast<unsigned>(m_machine->m_jsArrayVptr), X86::eax);
+    X86Assembler::JmpSrc failureCases2 = m_jit.emitUnlinkedJne();
+
+    // Checks out okay! - get the length from the storage
+    m_jit.movl_mr(OBJECT_OFFSET(JSArray, m_storage), X86::eax, X86::ecx);
+    m_jit.movl_mr(OBJECT_OFFSET(ArrayStorage, m_length), X86::ecx, X86::ecx);
+
+    m_jit.addl_rr(X86::ecx, X86::ecx);
+    X86Assembler::JmpSrc failureCases3 = m_jit.emitUnlinkedJo();
+    m_jit.addl_i8r(1, X86::ecx);
+
+    X86Assembler::JmpSrc success = m_jit.emitUnlinkedJmp();
+
+    void* code = m_jit.copy();
+    ASSERT(code);
+
+    // Use the repatch information to link the failure cases back to the original slow case routine.
+    void* slowCaseBegin = reinterpret_cast<char*>(info.callReturnLocation) - repatchOffsetGetByIdSlowCaseCall;
+    X86Assembler::link(code, failureCases1, slowCaseBegin);
+    X86Assembler::link(code, failureCases2, slowCaseBegin);
+    X86Assembler::link(code, failureCases3, slowCaseBegin);
+
+    // On success return back to the hot patch code, at a point it will perform the store to dest for us.
+    intptr_t successDest = (intptr_t)(info.hotPathBegin) + repatchOffsetGetByIdPropertyMapOffset;
+    X86Assembler::link(code, success, reinterpret_cast<void*>(successDest));
+
+    // Track the stub we have created so that it will be deleted later.
+    m_codeBlock->getStubInfo(returnAddress).stubRoutine = code;
+
+    // Finally repatch the jump to sow case back in the hot path to jump here instead.
+    // FIXME: should revert this repatching, on failure.
+    intptr_t jmpLocation = reinterpret_cast<intptr_t>(info.hotPathBegin) + repatchOffsetGetByIdBranchToSlowCase;
+    X86Assembler::repatchBranchOffset(jmpLocation, code);
+}
+
 void CTI::emitGetVariableObjectRegister(X86Assembler::RegisterID variableObject, int index, X86Assembler::RegisterID dst)
 {
     m_jit.movl_mr(JSVariableObject::offsetOf_d(), variableObject, dst);
index 300402b..6eb761a 100644 (file)
@@ -28,6 +28,8 @@
 
 #if ENABLE(CTI)
 
+#define WTF_USE_CTI_REPATCH_PIC 1
+
 #include "Opcode.h"
 #include "RegisterFile.h"
 #include <masm/X86Assembler.h>
@@ -216,6 +218,11 @@ namespace JSC {
         }
     };
 
+    struct StructureStubCompilationInfo {
+        X86Assembler::JmpSrc callReturnLocation;
+        X86Assembler::JmpDst hotPathBegin;
+    };
+
     extern "C" {
         JSValue* ctiTrampoline(void* code, ExecState* exec, RegisterFile* registerFile, Register* r, ScopeChainNode* scopeChain, CodeBlock* codeBlock, JSValue** exception, Profiler**);
         void ctiVMThrowTrampoline();
@@ -225,6 +232,20 @@ namespace JSC {
     void ctiRepatchCallByReturnAddress(void* where, void* what);
 
     class CTI {
+        static const int repatchGetByIdDefaultStructureID = -1;
+        // Magic number - initial offset cannot be representable as a signed 8bit value, or the X86Assembler
+        // will compress the displacement, and we may not be able to fit a repatched offset.
+        static const int repatchGetByIdDefaultOffset = 256;
+
+        // These architecture specific value are used to enable repatching - see comment on op_put_by_id.
+        static const int repatchOffsetPutByIdStructureID = 19;
+        static const int repatchOffsetPutByIdPropertyMapOffset = 34;
+        // These architecture specific value are used to enable repatching - see comment on op_get_by_id.
+        static const int repatchOffsetGetByIdStructureID = 19;
+        static const int repatchOffsetGetByIdBranchToSlowCase = 25;
+        static const int repatchOffsetGetByIdPropertyMapOffset = 34;
+        static const int repatchOffsetGetByIdSlowCaseCall = 17;
+
     public:
         static void compile(Machine* machine, ExecState* exec, CodeBlock* codeBlock)
         {
@@ -236,46 +257,55 @@ namespace JSC {
         static void* compileRegExp(ExecState* exec, const UString& pattern, unsigned* numSubpatterns_ptr, const char** error_ptr, bool ignoreCase = false, bool multiline = false);
 #endif
 
-        static void* compileGetByIdSelf(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset)
+        static void compileGetByIdSelf(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateCompileGetByIdSelf(structureID, cachedOffset);
+            cti.privateCompileGetByIdSelf(structureID, cachedOffset, returnAddress);
         }
 
-        static void* compileGetByIdProto(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, StructureID* prototypeStructureID, size_t cachedOffset)
+        static void compileGetByIdProto(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, StructureID* prototypeStructureID, size_t cachedOffset, void* returnAddress)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateCompileGetByIdProto(exec, structureID, prototypeStructureID, cachedOffset);
+            cti.privateCompileGetByIdProto(structureID, prototypeStructureID, cachedOffset, returnAddress);
         }
 
-        static void* compileGetByIdChain(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, StructureIDChain* chain, size_t count, size_t cachedOffset)
+        static void compileGetByIdChain(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, StructureIDChain* chain, size_t count, size_t cachedOffset, void* returnAddress)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateCompileGetByIdChain(exec, structureID, chain, count, cachedOffset);
+            cti.privateCompileGetByIdChain(structureID, chain, count, cachedOffset, returnAddress);
         }
 
-        static void* compilePutByIdReplace(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset)
+        static void compilePutByIdReplace(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateCompilePutByIdReplace(structureID, cachedOffset);
+            cti.privateCompilePutByIdReplace(structureID, cachedOffset, returnAddress);
         }
         
-        static void* compilePutByIdTransition(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* oldStructureID, StructureID* newStructureID, size_t cachedOffset, StructureIDChain* sIDC)
+        static void compilePutByIdTransition(Machine* machine, ExecState* exec, CodeBlock* codeBlock, StructureID* oldStructureID, StructureID* newStructureID, size_t cachedOffset, StructureIDChain* sIDC, void* returnAddress)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateCompilePutByIdTransition(oldStructureID, newStructureID, cachedOffset, sIDC);
+            cti.privateCompilePutByIdTransition(oldStructureID, newStructureID, cachedOffset, sIDC, returnAddress);
         }
 
         static void* compileArrayLengthTrampoline(Machine* machine, ExecState* exec, CodeBlock* codeBlock)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateArrayLengthTrampoline();
+            return cti.privateCompileArrayLengthTrampoline();
         }
 
         static void* compileStringLengthTrampoline(Machine* machine, ExecState* exec, CodeBlock* codeBlock)
         {
             CTI cti(machine, exec, codeBlock);
-            return cti.privateStringLengthTrampoline();
+            return cti.privateCompileStringLengthTrampoline();
+        }
+
+        static void patchGetByIdSelf(CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress);
+        static void patchPutByIdReplace(CodeBlock* codeBlock, StructureID* structureID, size_t cachedOffset, void* returnAddress);
+
+        static void compilePatchGetArrayLength(Machine* machine, ExecState* exec, CodeBlock* codeBlock, void* returnAddress)
+        {
+            CTI cti(machine, exec, codeBlock);
+            return cti.privateCompilePatchGetArrayLength(returnAddress);
         }
 
         inline static JSValue* execute(void* code, ExecState* exec, RegisterFile* registerFile, Register* r, ScopeChainNode* scopeChain, CodeBlock* codeBlock, JSValue** exception)
@@ -293,13 +323,15 @@ namespace JSC {
         void privateCompileLinkPass();
         void privateCompileSlowCases();
         void privateCompile();
-        void* privateCompileGetByIdSelf(StructureID*, size_t cachedOffset);
-        void* privateCompileGetByIdProto(ExecState*, StructureID*, StructureID* prototypeStructureID, size_t cachedOffset);
-        void* privateCompileGetByIdChain(ExecState*, StructureID*, StructureIDChain*, size_t count, size_t cachedOffset);
-        void* privateCompilePutByIdReplace(StructureID*, size_t cachedOffset);
-        void* privateCompilePutByIdTransition(StructureID*, StructureID*, size_t cachedOffset, StructureIDChain*);
-        void* privateArrayLengthTrampoline();
-        void* privateStringLengthTrampoline();
+        void privateCompileGetByIdSelf(StructureID*, size_t cachedOffset, void* returnAddress);
+        void privateCompileGetByIdProto(StructureID*, StructureID* prototypeStructureID, size_t cachedOffset, void* returnAddress);
+        void privateCompileGetByIdChain(StructureID*, StructureIDChain*, size_t count, size_t cachedOffset, void* returnAddress);
+        void privateCompilePutByIdReplace(StructureID*, size_t cachedOffset, void* returnAddress);
+        void privateCompilePutByIdTransition(StructureID*, StructureID*, size_t cachedOffset, StructureIDChain*, void* returnAddress);
+
+        void* privateCompileArrayLengthTrampoline();
+        void* privateCompileStringLengthTrampoline();
+        void privateCompilePatchGetArrayLength(void* returnAddress);
 
         enum CompileOpCallType { OpCallNormal, OpCallEval, OpConstruct };
         void compileOpCall(Instruction* instruction, unsigned i, CompileOpCallType type = OpCallNormal);
@@ -319,6 +351,7 @@ namespace JSC {
         JSValue* getConstantImmediateNumericArg(unsigned src);
         unsigned getDeTaggedConstantImmediate(JSValue* imm);
 
+        void CTI::emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex);
         void emitJumpSlowCaseIfNotImm(X86Assembler::RegisterID, unsigned opcodeIndex);
         void emitJumpSlowCaseIfNotImms(X86Assembler::RegisterID, X86Assembler::RegisterID, unsigned opcodeIndex);
 
@@ -331,11 +364,11 @@ namespace JSC {
 
         void emitDebugExceptionCheck();
 
-        void emitCall(unsigned opcodeIndex, CTIHelper_j);
-        void emitCall(unsigned opcodeIndex, CTIHelper_p);
-        void emitCall(unsigned opcodeIndex, CTIHelper_b);
-        void emitCall(unsigned opcodeIndex, CTIHelper_v);
-        void emitCall(unsigned opcodeIndex, CTIHelper_s);
+        X86Assembler::JmpSrc emitCall(unsigned opcodeIndex, CTIHelper_j);
+        X86Assembler::JmpSrc emitCall(unsigned opcodeIndex, CTIHelper_p);
+        X86Assembler::JmpSrc emitCall(unsigned opcodeIndex, CTIHelper_b);
+        X86Assembler::JmpSrc emitCall(unsigned opcodeIndex, CTIHelper_v);
+        X86Assembler::JmpSrc emitCall(unsigned opcodeIndex, CTIHelper_s);
         
         void emitGetVariableObjectRegister(X86Assembler::RegisterID variableObject, int index, X86Assembler::RegisterID dst);
         void emitPutVariableObjectRegister(X86Assembler::RegisterID src, X86Assembler::RegisterID variableObject, int index);
@@ -352,6 +385,7 @@ namespace JSC {
 
         Vector<CallRecord> m_calls;
         Vector<X86Assembler::JmpDst> m_labels;
+        Vector<StructureStubCompilationInfo> m_structureStubCompilationInfo;
         Vector<JmpTable> m_jmpTable;
 
         struct JSRInfo {
index 78b4d09..6f4b8cb 100644 (file)
@@ -277,7 +277,7 @@ void CodeBlock::dump(ExecState* exec) const
         printf("\nStructureIDs:\n");
         size_t i = 0;
         do {
-             printStructureIDs(&instructions[structureIDInstructions[i]]);
+             printStructureIDs(&instructions[structureIDInstructions[i].opcodeIndex]);
              ++i;
         } while (i < structureIDInstructions.size());
     }
@@ -875,13 +875,11 @@ void CodeBlock::dump(ExecState* exec, const Vector<Instruction>::const_iterator&
 CodeBlock::~CodeBlock()
 {
     size_t size = structureIDInstructions.size();
-    for (size_t i = 0; i < size; ++i)
-        derefStructureIDs(&instructions[structureIDInstructions[i]]);
-
-    size = structureIDAccessStubs.size();
-    for (size_t i = 0; i < size; ++i)
-        fastFree(structureIDAccessStubs[i]);
-
+    for (size_t i = 0; i < size; ++i) {
+        derefStructureIDs(&instructions[structureIDInstructions[i].opcodeIndex]);
+        if (structureIDInstructions[i].stubRoutine)
+            fastFree(structureIDInstructions[i].stubRoutine);
+    }
 #if ENABLE(CTI)
     if (ctiCode)
         fastFree(ctiCode);
index 991ea61..be3b662 100644 (file)
@@ -76,6 +76,21 @@ namespace JSC {
 #endif
     };
 
+    struct StructureStubInfo {
+        StructureStubInfo(unsigned opcodeIndex)
+            : opcodeIndex(opcodeIndex)
+            , stubRoutine(0)
+            , callReturnLocation(0)
+            , hotPathBegin(0)
+        {
+        }
+    
+        unsigned opcodeIndex;
+        void* stubRoutine;
+        void* callReturnLocation;
+        void* hotPathBegin;
+    };
+
     struct StringJumpTable {
         typedef HashMap<RefPtr<UString::Rep>, OffsetLocation> StringOffsetTable;
         StringOffsetTable offsetTable;
@@ -199,6 +214,20 @@ namespace JSC {
         void refStructureIDs(Instruction* vPC) const;
         void derefStructureIDs(Instruction* vPC) const;
 
+        StructureStubInfo& getStubInfo(void* returnAddress)
+        {
+            // FIXME: would a binary chop be faster here?
+            for (unsigned i = 0; i < structureIDInstructions.size(); ++i) {
+                if (structureIDInstructions[i].callReturnLocation == returnAddress)
+                    return structureIDInstructions[i];
+            }
+            
+            ASSERT_NOT_REACHED();
+            // keep the compiler happy.
+            static StructureStubInfo duff(0);
+            return duff;
+        }
+
         ScopeNode* ownerNode;
         JSGlobalData* globalData;
 #if ENABLE(CTI)
@@ -218,8 +247,7 @@ namespace JSC {
         unsigned sourceOffset;
 
         Vector<Instruction> instructions;
-        Vector<size_t> structureIDInstructions;
-        Vector<void*> structureIDAccessStubs;
+        Vector<StructureStubInfo> structureIDInstructions;
 
         // Constant pool
         Vector<Identifier> identifiers;
index 4c97b22..635b235 100644 (file)
@@ -3812,7 +3812,7 @@ NEVER_INLINE void Machine::tryCTICachePutByID(ExecState* exec, CodeBlock* codeBl
         vPC[6] = chain;
         vPC[7] = slot.cachedOffset();
         codeBlock->refStructureIDs(vPC);
-        ctiRepatchCallByReturnAddress(returnAddress, CTI::compilePutByIdTransition(this, exec, codeBlock, structureID->previousID(), structureID, slot.cachedOffset(), chain));
+        CTI::compilePutByIdTransition(this, exec, codeBlock, structureID->previousID(), structureID, slot.cachedOffset(), chain, returnAddress);
         return;
     }
     
@@ -3821,7 +3821,12 @@ NEVER_INLINE void Machine::tryCTICachePutByID(ExecState* exec, CodeBlock* codeBl
     vPC[5] = slot.cachedOffset();
     codeBlock->refStructureIDs(vPC);
 
-    ctiRepatchCallByReturnAddress(returnAddress, CTI::compilePutByIdReplace(this, exec, codeBlock, structureID, slot.cachedOffset()));
+#if USE(CTI_REPATCH_PIC)
+    UNUSED_PARAM(exec);
+    CTI::patchPutByIdReplace(codeBlock, structureID, slot.cachedOffset(), returnAddress);
+#else
+    CTI::compilePutByIdReplace(this, exec, codeBlock, structureID, slot.cachedOffset(), returnAddress);
+#endif
 }
 
 void* Machine::getCTIArrayLengthTrampoline(ExecState* exec, CodeBlock* codeBlock)
@@ -3845,10 +3850,16 @@ NEVER_INLINE void Machine::tryCTICacheGetByID(ExecState* exec, CodeBlock* codeBl
     // The interpreter checks for recursion here; I do not believe this can occur in CTI.
 
     if (isJSArray(baseValue) && propertyName == exec->propertyNames().length) {
+#if USE(CTI_REPATCH_PIC)
+        CTI::compilePatchGetArrayLength(this, exec, codeBlock, returnAddress);
+#else
         ctiRepatchCallByReturnAddress(returnAddress, getCTIArrayLengthTrampoline(exec, codeBlock));
+#endif
         return;
     }
     if (isJSString(baseValue) && propertyName == exec->propertyNames().length) {
+        // The tradeoff of compiling an repatched inline string length access routine does not seem
+        // to pay off, so we currently only do this for arrays.
         ctiRepatchCallByReturnAddress(returnAddress, getCTIStringLengthTrampoline(exec, codeBlock));
         return;
     }
@@ -3888,7 +3899,11 @@ NEVER_INLINE void Machine::tryCTICacheGetByID(ExecState* exec, CodeBlock* codeBl
         vPC[5] = slot.cachedOffset();
         codeBlock->refStructureIDs(vPC);
         
-        ctiRepatchCallByReturnAddress(returnAddress, CTI::compileGetByIdSelf(this, exec, codeBlock, structureID, slot.cachedOffset()));
+#if USE(CTI_REPATCH_PIC)
+        CTI::patchGetByIdSelf(codeBlock, structureID, slot.cachedOffset(), returnAddress);
+#else
+        CTI::compileGetByIdSelf(this, exec, codeBlock, structureID, slot.cachedOffset(), returnAddress);
+#endif
         return;
     }
 
@@ -3911,7 +3926,7 @@ NEVER_INLINE void Machine::tryCTICacheGetByID(ExecState* exec, CodeBlock* codeBl
         vPC[6] = slot.cachedOffset();
         codeBlock->refStructureIDs(vPC);
 
-        ctiRepatchCallByReturnAddress(returnAddress, CTI::compileGetByIdProto(this, exec, codeBlock, structureID, slotBaseObject->structureID(), slot.cachedOffset()));
+        CTI::compileGetByIdProto(this, exec, codeBlock, structureID, slotBaseObject->structureID(), slot.cachedOffset(), returnAddress);
         return;
     }
 
@@ -3953,7 +3968,7 @@ NEVER_INLINE void Machine::tryCTICacheGetByID(ExecState* exec, CodeBlock* codeBl
     vPC[7] = slot.cachedOffset();
     codeBlock->refStructureIDs(vPC);
 
-    ctiRepatchCallByReturnAddress(returnAddress, CTI::compileGetByIdChain(this, exec, codeBlock, structureID, chain, count, slot.cachedOffset()));
+    CTI::compileGetByIdChain(this, exec, codeBlock, structureID, chain, count, slot.cachedOffset(), returnAddress);
 }
 
 
@@ -4640,6 +4655,30 @@ void Machine::cti_op_put_by_val(CTI_ARGS)
     VM_CHECK_EXCEPTION_AT_END();
 }
 
+void Machine::cti_op_put_by_val_array(CTI_ARGS)
+{
+    ExecState* exec = ARG_exec;
+
+    JSValue* baseValue = ARG_src1;
+    int i = ARG_int2;
+    JSValue* value = ARG_src3;
+
+    ASSERT(exec->machine()->isJSArray(baseValue));
+
+    if (LIKELY(i >= 0))
+        static_cast<JSArray*>(baseValue)->JSArray::put(exec, i, value);
+    else {
+        Identifier property(exec, JSImmediate::from(i)->toString(exec));
+        // FIXME: can toString throw an exception here?
+        if (!exec->hadException()) { // Don't put to an object if toString threw an exception.
+            PutPropertySlot slot;
+            baseValue->put(exec, property, value, slot);
+        }
+    }
+
+    VM_CHECK_EXCEPTION_AT_END();
+}
+
 JSValue* Machine::cti_op_lesseq(CTI_ARGS)
 {
     ExecState* exec = ARG_exec;
index 0519fd4..0f4d3ae 100644 (file)
@@ -169,6 +169,7 @@ namespace JSC {
         static JSValue* SFX_CALL cti_op_resolve_func(CTI_ARGS);
         static JSValue* SFX_CALL cti_op_sub(CTI_ARGS);
         static void SFX_CALL cti_op_put_by_val(CTI_ARGS);
+        static void SFX_CALL cti_op_put_by_val_array(CTI_ARGS);
         static JSValue* SFX_CALL cti_op_lesseq(CTI_ARGS);
         static int SFX_CALL cti_op_loop_if_true(CTI_ARGS);
         static JSValue* SFX_CALL cti_op_resolve_base(CTI_ARGS);
index a0f80e0..93886e4 100644 (file)
@@ -209,6 +209,7 @@ public:
         OP2_JE_rel32    = 0x84,
         OP2_JNE_rel32   = 0x85,
         OP2_JBE_rel32   = 0x86,
+        OP2_JA_rel32    = 0x87,
         OP2_JL_rel32    = 0x8C,
         OP2_JGE_rel32   = 0x8D,
         OP2_JLE_rel32   = 0x8E,
@@ -371,6 +372,13 @@ public:
         m_buffer->putInt(imm);
     }
 
+    void cmpl_i8m(int imm, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        m_buffer->putByte(OP_GROUP1_EvIb);
+        emitModRm_opmsib(GROUP1_OP_CMP, base, index, scale, offset);
+        m_buffer->putByte(imm);
+    }
+
     void cmpw_rm(RegisterID src, RegisterID base, RegisterID index, int scale)
     {
         m_buffer->putByte(PRE_OPERAND_SIZE);
@@ -741,6 +749,14 @@ public:
         return JmpSrc(m_buffer->getOffset());
     }
     
+    JmpSrc emitUnlinkedJa()
+    {
+        m_buffer->putByte(OP_2BYTE_ESCAPE);
+        m_buffer->putByte(OP2_JA_rel32);
+        m_buffer->putInt(0);
+        return JmpSrc(m_buffer->getOffset());
+    }
+    
     JmpSrc emitUnlinkedJae()
     {
         m_buffer->putByte(OP_2BYTE_ESCAPE);
@@ -785,16 +801,41 @@ public:
         ((int*)((ptrdiff_t)code + from.m_offset))[-1] = (ptrdiff_t)to - ((ptrdiff_t)code + from.m_offset);
     }
     
-    void* getRelocatedAddress(void* code, JmpSrc jump)
+    static void* getRelocatedAddress(void* code, JmpSrc jump)
     {
         return reinterpret_cast<void*>((ptrdiff_t)code + jump.m_offset);
     }
     
-    void* getRelocatedAddress(void* code, JmpDst jump)
+    static void* getRelocatedAddress(void* code, JmpDst jump)
     {
         return reinterpret_cast<void*>((ptrdiff_t)code + jump.m_offset);
     }
     
+    static int getDifferenceBetweenLabels(JmpDst src, JmpDst dst)
+    {
+        return dst.m_offset - src.m_offset;
+    }
+    
+    static int getDifferenceBetweenLabels(JmpDst src, JmpSrc dst)
+    {
+        return dst.m_offset - src.m_offset;
+    }
+    
+    static void repatchImmediate(intptr_t where, int32_t value)
+    {
+        reinterpret_cast<int32_t*>(where)[-1] = value;
+    }
+    
+    static void repatchDisplacement(intptr_t where, intptr_t value)
+    {
+        reinterpret_cast<intptr_t*>(where)[-1] = value;
+    }
+    
+    static void repatchBranchOffset(intptr_t where, void* destination)
+    {
+        reinterpret_cast<intptr_t*>(where)[-1] = (reinterpret_cast<intptr_t>(destination) - where);
+    }
+    
     void* copy() 
     {
         return m_buffer->copy();
@@ -931,6 +972,11 @@ private:
         emitModRm_rm(static_cast<RegisterID>(opcode), addr);
     }
 
+    void emitModRm_opmsib(OpcodeID opcode, RegisterID base, RegisterID index, int scale, int offset)
+    {
+        emitModRm_rmsib(static_cast<RegisterID>(opcode), base, index, scale, offset);
+    }
+
     JITCodeBuffer* m_buffer;
 };