[JSC] Add GPRReg::InvalidGPRReg and FPRReg::InvalidFPRReg
[WebKit-https.git] / Source / JavaScriptCore / assembler / ARMv7Assembler.h
index 7e2c882..40d785e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2009-2017 Apple Inc. All rights reserved.
  * Copyright (C) 2010 University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  */
 
-#ifndef ARMAssembler_h
-#define ARMAssembler_h
+#pragma once
 
 #if ENABLE(ASSEMBLER) && CPU(ARM_THUMB2)
 
 #include "AssemblerBuffer.h"
+#include "AssemblerCommon.h"
+#include <limits.h>
 #include <wtf/Assertions.h>
 #include <wtf/Vector.h>
 #include <stdint.h>
@@ -37,7 +38,8 @@
 namespace JSC {
 
 namespace ARMRegisters {
-    typedef enum {
+
+    typedef enum : int8_t {
         r0,
         r1,
         r2,
@@ -45,18 +47,32 @@ namespace ARMRegisters {
         r4,
         r5,
         r6,
-        r7, wr = r7,   // thumb work register
+        r7,
         r8,
-        r9, sb = r9,   // static base
-        r10, sl = r10, // stack limit
-        r11, fp = r11, // frame pointer
-        r12, ip = r12,
-        r13, sp = r13,
-        r14, lr = r14,
-        r15, pc = r15,
+        r9,
+        r10,
+        r11,
+        r12,
+        r13,
+        r14,
+        r15,
+
+        fp = r7,   // frame pointer
+        sb = r9,   // static base
+        sl = r10,  // stack limit
+        ip = r12,
+        sp = r13,
+        lr = r14,
+        pc = r15,
+        InvalidGPRReg = -1,
     } RegisterID;
 
-    typedef enum {
+    typedef enum : int8_t {
+        apsr,
+        fpscr
+    } SPRegisterID;
+
+    typedef enum : int8_t {
         s0,
         s1,
         s2,
@@ -91,7 +107,7 @@ namespace ARMRegisters {
         s31,
     } FPSingleRegisterID;
 
-    typedef enum {
+    typedef enum : int8_t {
         d0,
         d1,
         d2,
@@ -108,6 +124,7 @@ namespace ARMRegisters {
         d13,
         d14,
         d15,
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
         d16,
         d17,
         d18,
@@ -124,9 +141,12 @@ namespace ARMRegisters {
         d29,
         d30,
         d31,
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
+        InvalidFPRReg = -1,
     } FPDoubleRegisterID;
 
-    typedef enum {
+#if CPU(ARM_NEON)
+    typedef enum : int8_t {
         q0,
         q1,
         q2,
@@ -143,23 +163,8 @@ namespace ARMRegisters {
         q13,
         q14,
         q15,
-        q16,
-        q17,
-        q18,
-        q19,
-        q20,
-        q21,
-        q22,
-        q23,
-        q24,
-        q25,
-        q26,
-        q27,
-        q28,
-        q29,
-        q30,
-        q31,
     } FPQuadRegisterID;
+#endif // CPU(ARM_NEON)
 
     inline FPSingleRegisterID asSingle(FPDoubleRegisterID reg)
     {
@@ -172,7 +177,8 @@ namespace ARMRegisters {
         ASSERT(!(reg & 1));
         return (FPDoubleRegisterID)(reg >> 1);
     }
-}
+
+} // namespace ARMRegisters
 
 class ARMv7Assembler;
 class ARMThumbImmediate {
@@ -340,6 +346,8 @@ public:
         return m_type != TypeInvalid;
     }
 
+    uint16_t asUInt16() const { return m_value.asInt; }
+
     // These methods rely on the format of encoded byte values.
     bool isUInt3() { return !(m_value.asInt & 0xfff8); }
     bool isUInt4() { return !(m_value.asInt & 0xfff0); }
@@ -357,8 +365,8 @@ public:
     uint8_t getUInt6() { ASSERT(isUInt6()); return m_value.asInt; }
     uint8_t getUInt7() { ASSERT(isUInt7()); return m_value.asInt; }
     uint8_t getUInt8() { ASSERT(isUInt8()); return m_value.asInt; }
-    uint8_t getUInt9() { ASSERT(isUInt9()); return m_value.asInt; }
-    uint8_t getUInt10() { ASSERT(isUInt10()); return m_value.asInt; }
+    uint16_t getUInt9() { ASSERT(isUInt9()); return m_value.asInt; }
+    uint16_t getUInt10() { ASSERT(isUInt10()); return m_value.asInt; }
     uint16_t getUInt12() { ASSERT(isUInt12()); return m_value.asInt; }
     uint16_t getUInt16() { ASSERT(isUInt16()); return m_value.asInt; }
 
@@ -412,71 +420,161 @@ private:
 
 class ARMv7Assembler {
 public:
-    ~ARMv7Assembler()
-    {
-        ASSERT(m_jumpsToLink.isEmpty());
-    }
-
     typedef ARMRegisters::RegisterID RegisterID;
     typedef ARMRegisters::FPSingleRegisterID FPSingleRegisterID;
     typedef ARMRegisters::FPDoubleRegisterID FPDoubleRegisterID;
+#if CPU(ARM_NEON)
     typedef ARMRegisters::FPQuadRegisterID FPQuadRegisterID;
+#endif
+    typedef ARMRegisters::SPRegisterID SPRegisterID;
+    typedef FPDoubleRegisterID FPRegisterID;
+    
+    static constexpr RegisterID firstRegister() { return ARMRegisters::r0; }
+    static constexpr RegisterID lastRegister() { return ARMRegisters::r15; }
+    static constexpr unsigned numberOfRegisters() { return lastRegister() - firstRegister() + 1; }
+
+    static constexpr SPRegisterID firstSPRegister() { return ARMRegisters::apsr; }
+    static constexpr SPRegisterID lastSPRegister() { return ARMRegisters::fpscr; }
+    static constexpr unsigned numberOfSPRegisters() { return lastSPRegister() - firstSPRegister() + 1; }
+
+    static constexpr FPRegisterID firstFPRegister() { return ARMRegisters::d0; }
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
+    static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d31; }
+#else
+    static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d15; }
+#endif
+    static constexpr unsigned numberOfFPRegisters() { return lastFPRegister() - firstFPRegister() + 1; }
+
+    static const char* gprName(RegisterID id)
+    {
+        ASSERT(id >= firstRegister() && id <= lastRegister());
+        static const char* const nameForRegister[numberOfRegisters()] = {
+            "r0", "r1", "r2", "r3",
+            "r4", "r5", "r6", "fp",
+            "r8", "r9", "r10", "r11",
+            "ip", "sp", "lr", "pc"
+        };
+        return nameForRegister[id];
+    }
+
+    static const char* sprName(SPRegisterID id)
+    {
+        ASSERT(id >= firstSPRegister() && id <= lastSPRegister());
+        static const char* const nameForRegister[numberOfSPRegisters()] = {
+            "apsr", "fpscr"
+        };
+        return nameForRegister[id];
+    }
+
+    static const char* fprName(FPRegisterID id)
+    {
+        ASSERT(id >= firstFPRegister() && id <= lastFPRegister());
+        static const char* const nameForRegister[numberOfFPRegisters()] = {
+            "d0", "d1", "d2", "d3",
+            "d4", "d5", "d6", "d7",
+            "d8", "d9", "d10", "d11",
+            "d12", "d13", "d14", "d15",
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
+            "d16", "d17", "d18", "d19",
+            "d20", "d21", "d22", "d23",
+            "d24", "d25", "d26", "d27",
+            "d28", "d29", "d30", "d31"
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
+        };
+        return nameForRegister[id];
+    }
 
     // (HS, LO, HI, LS) -> (AE, B, A, BE)
     // (VS, VC) -> (O, NO)
     typedef enum {
-        ConditionEQ,
-        ConditionNE,
-        ConditionHS, ConditionCS = ConditionHS,
-        ConditionLO, ConditionCC = ConditionLO,
-        ConditionMI,
-        ConditionPL,
-        ConditionVS,
-        ConditionVC,
-        ConditionHI,
-        ConditionLS,
-        ConditionGE,
-        ConditionLT,
-        ConditionGT,
-        ConditionLE,
-        ConditionAL,
+        ConditionEQ, // Zero / Equal.
+        ConditionNE, // Non-zero / Not equal.
+        ConditionHS, ConditionCS = ConditionHS, // Unsigned higher or same.
+        ConditionLO, ConditionCC = ConditionLO, // Unsigned lower.
+        ConditionMI, // Negative.
+        ConditionPL, // Positive or zero.
+        ConditionVS, // Overflowed.
+        ConditionVC, // Not overflowed.
+        ConditionHI, // Unsigned higher.
+        ConditionLS, // Unsigned lower or same.
+        ConditionGE, // Signed greater than or equal.
+        ConditionLT, // Signed less than.
+        ConditionGT, // Signed greater than.
+        ConditionLE, // Signed less than or equal.
+        ConditionAL, // Unconditional / Always execute.
         ConditionInvalid
     } Condition;
 
-    enum JumpType { JumpFixed, JumpNoCondition, JumpCondition, JumpNoConditionFixedSize, JumpConditionFixedSize, JumpTypeCount };
-    enum JumpLinkType { LinkInvalid, LinkJumpT1, LinkJumpT2, LinkJumpT3,
-        LinkJumpT4, LinkConditionalJumpT4, LinkBX, LinkConditionalBX, JumpLinkTypeCount };
-    static const int JumpSizes[JumpLinkTypeCount];
-    static const int JumpPaddingSizes[JumpTypeCount];
+#define JUMP_ENUM_WITH_SIZE(index, value) (((value) << 3) | (index))
+#define JUMP_ENUM_SIZE(jump) ((jump) >> 3) 
+    enum JumpType { JumpFixed = JUMP_ENUM_WITH_SIZE(0, 0), 
+                    JumpNoCondition = JUMP_ENUM_WITH_SIZE(1, 5 * sizeof(uint16_t)),
+                    JumpCondition = JUMP_ENUM_WITH_SIZE(2, 6 * sizeof(uint16_t)),
+                    JumpNoConditionFixedSize = JUMP_ENUM_WITH_SIZE(3, 5 * sizeof(uint16_t)),
+                    JumpConditionFixedSize = JUMP_ENUM_WITH_SIZE(4, 6 * sizeof(uint16_t))
+    };
+    enum JumpLinkType { 
+        LinkInvalid = JUMP_ENUM_WITH_SIZE(0, 0),
+        LinkJumpT1 = JUMP_ENUM_WITH_SIZE(1, sizeof(uint16_t)),
+        LinkJumpT2 = JUMP_ENUM_WITH_SIZE(2, sizeof(uint16_t)),
+        LinkJumpT3 = JUMP_ENUM_WITH_SIZE(3, 2 * sizeof(uint16_t)),
+        LinkJumpT4 = JUMP_ENUM_WITH_SIZE(4, 2 * sizeof(uint16_t)),
+        LinkConditionalJumpT4 = JUMP_ENUM_WITH_SIZE(5, 3 * sizeof(uint16_t)),
+        LinkBX = JUMP_ENUM_WITH_SIZE(6, 5 * sizeof(uint16_t)),
+        LinkConditionalBX = JUMP_ENUM_WITH_SIZE(7, 6 * sizeof(uint16_t))
+    };
+
     class LinkRecord {
     public:
         LinkRecord(intptr_t from, intptr_t to, JumpType type, Condition condition)
-            : m_from(from)
-            , m_to(to)
-            , m_type(type)
-            , m_linkType(LinkInvalid)
-            , m_condition(condition)
         {
+            data.realTypes.m_from = from;
+            data.realTypes.m_to = to;
+            data.realTypes.m_type = type;
+            data.realTypes.m_linkType = LinkInvalid;
+            data.realTypes.m_condition = condition;
         }
-        intptr_t from() const { return m_from; }
-        void setFrom(intptr_t from) { m_from = from; }
-        intptr_t to() const { return m_to; }
-        JumpType type() const { return m_type; }
-        JumpLinkType linkType() const { return m_linkType; }
-        void setLinkType(JumpLinkType linkType) { ASSERT(m_linkType == LinkInvalid); m_linkType = linkType; }
-        Condition condition() const { return m_condition; }
+        void operator=(const LinkRecord& other)
+        {
+            data.copyTypes.content[0] = other.data.copyTypes.content[0];
+            data.copyTypes.content[1] = other.data.copyTypes.content[1];
+            data.copyTypes.content[2] = other.data.copyTypes.content[2];
+        }
+        intptr_t from() const { return data.realTypes.m_from; }
+        void setFrom(intptr_t from) { data.realTypes.m_from = from; }
+        intptr_t to() const { return data.realTypes.m_to; }
+        JumpType type() const { return data.realTypes.m_type; }
+        JumpLinkType linkType() const { return data.realTypes.m_linkType; }
+        void setLinkType(JumpLinkType linkType) { ASSERT(data.realTypes.m_linkType == LinkInvalid); data.realTypes.m_linkType = linkType; }
+        Condition condition() const { return data.realTypes.m_condition; }
     private:
-        intptr_t m_from : 31;
-        intptr_t m_to : 31;
-        JumpType m_type : 3;
-        JumpLinkType m_linkType : 4;
-        Condition m_condition : 16;
+        union {
+            struct RealTypes {
+                intptr_t m_from : 31;
+                intptr_t m_to : 31;
+                JumpType m_type : 8;
+                JumpLinkType m_linkType : 8;
+                Condition m_condition : 16;
+            } realTypes;
+            struct CopyTypes {
+                uint32_t content[3];
+            } copyTypes;
+            COMPILE_ASSERT(sizeof(RealTypes) == sizeof(CopyTypes), LinkRecordCopyStructSizeEqualsRealStruct);
+        } data;
     };
 
+    ARMv7Assembler()
+        : m_indexOfLastWatchpoint(INT_MIN)
+        , m_indexOfTailOfLastWatchpoint(INT_MIN)
+    {
+    }
+
+    AssemblerBuffer& buffer() { return m_formatter.m_buffer; }
+
 private:
 
     // ARMv7, Appx-A.6.3
-    bool BadReg(RegisterID reg)
+    static bool BadReg(RegisterID reg)
     {
         return (reg == ARMRegisters::sp) || (reg == ARMRegisters::pc);
     }
@@ -518,18 +616,26 @@ private:
         OP_BLX              = 0x4700,
         OP_BX               = 0x4700,
         OP_STR_reg_T1       = 0x5000,
+        OP_STRH_reg_T1      = 0x5200,
+        OP_STRB_reg_T1      = 0x5400,
+        OP_LDRSB_reg_T1     = 0x5600,
         OP_LDR_reg_T1       = 0x5800,
         OP_LDRH_reg_T1      = 0x5A00,
         OP_LDRB_reg_T1      = 0x5C00,
+        OP_LDRSH_reg_T1     = 0x5E00,
         OP_STR_imm_T1       = 0x6000,
         OP_LDR_imm_T1       = 0x6800,
+        OP_STRB_imm_T1      = 0x7000,
         OP_LDRB_imm_T1      = 0x7800,
+        OP_STRH_imm_T1      = 0x8000,
         OP_LDRH_imm_T1      = 0x8800,
         OP_STR_imm_T2       = 0x9000,
         OP_LDR_imm_T2       = 0x9800,
         OP_ADD_SP_imm_T1    = 0xA800,
         OP_ADD_SP_imm_T2    = 0xB000,
         OP_SUB_SP_imm_T1    = 0xB080,
+        OP_PUSH_T1          = 0xB400,
+        OP_POP_T1           = 0xBC00,
         OP_BKPT             = 0xBE00,
         OP_IT               = 0xBF00,
         OP_NOP_T1           = 0xBF00,
@@ -538,6 +644,8 @@ private:
     typedef enum {
         OP_B_T1         = 0xD000,
         OP_B_T2         = 0xE000,
+        OP_POP_T2       = 0xE8BD,
+        OP_PUSH_T2      = 0xE92D,
         OP_AND_reg_T2   = 0xEA00,
         OP_TST_reg_T2   = 0xEA10,
         OP_ORR_reg_T2   = 0xEA40,
@@ -553,18 +661,28 @@ private:
         OP_SUB_reg_T2   = 0xEBA0,
         OP_SUB_S_reg_T2 = 0xEBB0,
         OP_CMP_reg_T2   = 0xEBB0,
+        OP_VMOV_CtoD    = 0xEC00,
+        OP_VMOV_DtoC    = 0xEC10,
+        OP_FSTS         = 0xED00,
         OP_VSTR         = 0xED00,
+        OP_FLDS         = 0xED10,
         OP_VLDR         = 0xED10,
-        OP_VMOV_StoC    = 0xEE00,
-        OP_VMOV_CtoS    = 0xEE10,
+        OP_VMOV_CtoS    = 0xEE00,
+        OP_VMOV_StoC    = 0xEE10,
         OP_VMUL_T2      = 0xEE20,
         OP_VADD_T2      = 0xEE30,
         OP_VSUB_T2      = 0xEE30,
         OP_VDIV         = 0xEE80,
+        OP_VABS_T2      = 0xEEB0,
         OP_VCMP         = 0xEEB0,
         OP_VCVT_FPIVFP  = 0xEEB0,
+        OP_VMOV_T2      = 0xEEB0,
         OP_VMOV_IMM_T2  = 0xEEB0,
         OP_VMRS         = 0xEEB0,
+        OP_VNEG_T2      = 0xEEB0,
+        OP_VSQRT_T1     = 0xEEB0,
+        OP_VCVTSD_T1    = 0xEEB0,
+        OP_VCVTDS_T1    = 0xEEB0,
         OP_B_T3a        = 0xF000,
         OP_B_T4a        = 0xF000,
         OP_AND_imm_T1   = 0xF000,
@@ -576,51 +694,80 @@ private:
         OP_ADD_imm_T3   = 0xF100,
         OP_ADD_S_imm_T3 = 0xF110,
         OP_CMN_imm      = 0xF110,
+        OP_ADC_imm      = 0xF140,
         OP_SUB_imm_T3   = 0xF1A0,
         OP_SUB_S_imm_T3 = 0xF1B0,
         OP_CMP_imm_T2   = 0xF1B0,
         OP_RSB_imm_T2   = 0xF1C0,
+        OP_RSB_S_imm_T2 = 0xF1D0,
         OP_ADD_imm_T4   = 0xF200,
         OP_MOV_imm_T3   = 0xF240,
         OP_SUB_imm_T4   = 0xF2A0,
         OP_MOVT         = 0xF2C0,
+        OP_UBFX_T1      = 0xF3C0,
         OP_NOP_T2a      = 0xF3AF,
+        OP_DMB_T1a      = 0xF3BF,
+        OP_STRB_imm_T3  = 0xF800,
+        OP_STRB_reg_T2  = 0xF800,
         OP_LDRB_imm_T3  = 0xF810,
         OP_LDRB_reg_T2  = 0xF810,
+        OP_STRH_imm_T3  = 0xF820,
+        OP_STRH_reg_T2  = 0xF820,
         OP_LDRH_reg_T2  = 0xF830,
         OP_LDRH_imm_T3  = 0xF830,
         OP_STR_imm_T4   = 0xF840,
         OP_STR_reg_T2   = 0xF840,
         OP_LDR_imm_T4   = 0xF850,
         OP_LDR_reg_T2   = 0xF850,
+        OP_STRB_imm_T2  = 0xF880,
         OP_LDRB_imm_T2  = 0xF890,
+        OP_STRH_imm_T2  = 0xF8A0,
         OP_LDRH_imm_T2  = 0xF8B0,
         OP_STR_imm_T3   = 0xF8C0,
         OP_LDR_imm_T3   = 0xF8D0,
+        OP_LDRSB_reg_T2 = 0xF910,
+        OP_LDRSH_reg_T2 = 0xF930,
         OP_LSL_reg_T2   = 0xFA00,
         OP_LSR_reg_T2   = 0xFA20,
         OP_ASR_reg_T2   = 0xFA40,
         OP_ROR_reg_T2   = 0xFA60,
         OP_CLZ          = 0xFAB0,
         OP_SMULL_T1     = 0xFB80,
+#if HAVE(ARM_IDIV_INSTRUCTIONS)
+        OP_SDIV_T1      = 0xFB90,
+        OP_UDIV_T1      = 0xFBB0,
+#endif
+        OP_MRS_T1       = 0xF3EF,
     } OpcodeID1;
 
     typedef enum {
-        OP_VADD_T2b     = 0x0A00,
-        OP_VDIVb        = 0x0A00,
-        OP_VLDRb        = 0x0A00,
-        OP_VMOV_IMM_T2b = 0x0A00,
-        OP_VMUL_T2b     = 0x0A00,
-        OP_VSTRb        = 0x0A00,
-        OP_VMOV_CtoSb   = 0x0A10,
-        OP_VMOV_StoCb   = 0x0A10,
-        OP_VMRSb        = 0x0A10,
-        OP_VCMPb        = 0x0A40,
-        OP_VCVT_FPIVFPb = 0x0A40,
-        OP_VSUB_T2b     = 0x0A40,
-        OP_NOP_T2b      = 0x8000,
-        OP_B_T3b        = 0x8000,
-        OP_B_T4b        = 0x9000,
+        OP_VADD_T2b      = 0x0A00,
+        OP_VDIVb         = 0x0A00,
+        OP_FLDSb         = 0x0A00,
+        OP_VLDRb         = 0x0A00,
+        OP_VMOV_IMM_T2b  = 0x0A00,
+        OP_VMOV_T2b      = 0x0A40,
+        OP_VMUL_T2b      = 0x0A00,
+        OP_FSTSb         = 0x0A00,
+        OP_VSTRb         = 0x0A00,
+        OP_VMOV_StoCb    = 0x0A10,
+        OP_VMOV_CtoSb    = 0x0A10,
+        OP_VMOV_DtoCb    = 0x0A10,
+        OP_VMOV_CtoDb    = 0x0A10,
+        OP_VMRSb         = 0x0A10,
+        OP_VABS_T2b      = 0x0A40,
+        OP_VCMPb         = 0x0A40,
+        OP_VCVT_FPIVFPb  = 0x0A40,
+        OP_VNEG_T2b      = 0x0A40,
+        OP_VSUB_T2b      = 0x0A40,
+        OP_VSQRT_T1b     = 0x0A40,
+        OP_VCVTSD_T1b    = 0x0A40,
+        OP_VCVTDS_T1b    = 0x0A40,
+        OP_NOP_T2b       = 0x8000,
+        OP_DMB_SY_T1b    = 0x8F5F,
+        OP_DMB_ISHST_T1b = 0x8F5A,
+        OP_B_T3b         = 0x8000,
+        OP_B_T4b         = 0x9000,
     } OpcodeID2;
 
     struct FourFours {
@@ -646,11 +793,11 @@ private:
     class ARMInstructionFormatter;
 
     // false means else!
-    bool ifThenElseConditionBit(Condition condition, bool isIf)
+    static bool ifThenElseConditionBit(Condition condition, bool isIf)
     {
         return isIf ? (condition & 1) : !(condition & 1);
     }
-    uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if, bool inst4if)
+    static uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if, bool inst4if)
     {
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | (ifThenElseConditionBit(condition, inst3if) << 2)
@@ -659,7 +806,7 @@ private:
         ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
-    uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if)
+    static uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if)
     {
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | (ifThenElseConditionBit(condition, inst3if) << 2)
@@ -667,7 +814,7 @@ private:
         ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
-    uint8_t ifThenElse(Condition condition, bool inst2if)
+    static uint8_t ifThenElse(Condition condition, bool inst2if)
     {
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | 4;
@@ -675,7 +822,7 @@ private:
         return (condition << 4) | mask;
     }
 
-    uint8_t ifThenElse(Condition condition)
+    static uint8_t ifThenElse(Condition condition)
     {
         int mask = 8;
         return (condition << 4) | mask;
@@ -683,6 +830,17 @@ private:
 
 public:
     
+    void adc(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    {
+        // Rd can only be SP if Rn is also SP.
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isEncodedImm());
+
+        m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_ADC_imm, rn, rd, imm);
+    }
+
     void add(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
@@ -691,12 +849,13 @@ public:
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isValid());
 
-        if (rn == ARMRegisters::sp) {
+        if (rn == ARMRegisters::sp && imm.isUInt16()) {
+            ASSERT(!(imm.getUInt16() & 3));
             if (!(rd & 8) && imm.isUInt10()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_SP_imm_T1, rd, imm.getUInt10() >> 2);
+                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_SP_imm_T1, rd, static_cast<uint8_t>(imm.getUInt10() >> 2));
                 return;
             } else if ((rd == ARMRegisters::sp) && imm.isUInt9()) {
-                m_formatter.oneWordOp9Imm7(OP_ADD_SP_imm_T2, imm.getUInt9() >> 2);
+                m_formatter.oneWordOp9Imm7(OP_ADD_SP_imm_T2, static_cast<uint8_t>(imm.getUInt9() >> 2));
                 return;
             }
         } else if (!((rd | rn) & 8)) {
@@ -717,7 +876,7 @@ public:
         }
     }
 
-    void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -727,8 +886,13 @@ public:
     }
 
     // NOTE: In an IT block, add doesn't modify the flags register.
-    void add(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void add(RegisterID rd, RegisterID rn, RegisterID rm)
     {
+        if (rd == ARMRegisters::sp) {
+            mov(rd, rn);
+            rn = rd;
+        }
+
         if (rd == rn)
             m_formatter.oneWordOp8RegReg143(OP_ADD_reg_T2, rm, rd);
         else if (rd == rm)
@@ -740,7 +904,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void add_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
@@ -762,7 +926,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block?
-    void add_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -772,7 +936,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void add_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
             m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_reg_T1, rm, rn, rd);
@@ -780,7 +944,7 @@ public:
             add_S(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -788,7 +952,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_AND_imm_T1, rn, rd, imm);
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -796,7 +960,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_AND_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if ((rd == rn) && !((rd | rm) & 8))
             m_formatter.oneWordOp10Reg3Reg3(OP_AND_reg_T1, rm, rd);
@@ -806,7 +970,7 @@ public:
             ARM_and(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void asr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void asr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -814,7 +978,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_ASR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void asr(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void asr(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -823,40 +987,48 @@ public:
     }
     
     // Only allowed in IT (if then) block if last instruction.
-    AssemblerLabel b()
+    ALWAYS_INLINE AssemblerLabel b()
     {
         m_formatter.twoWordOp16Op16(OP_B_T4a, OP_B_T4b);
-        return AssemblerLabel(m_formatter.label());
+        return m_formatter.label();
     }
     
     // Only allowed in IT (if then) block if last instruction.
-    AssemblerLabel blx(RegisterID rm)
+    ALWAYS_INLINE AssemblerLabel blx(RegisterID rm)
     {
         ASSERT(rm != ARMRegisters::pc);
         m_formatter.oneWordOp8RegReg143(OP_BLX, rm, (RegisterID)8);
-        return AssemblerLabel(m_formatter.label());
+        return m_formatter.label();
     }
 
     // Only allowed in IT (if then) block if last instruction.
-    AssemblerLabel bx(RegisterID rm)
+    ALWAYS_INLINE AssemblerLabel bx(RegisterID rm)
     {
         m_formatter.oneWordOp8RegReg143(OP_BX, rm, (RegisterID)0);
-        return AssemblerLabel(m_formatter.label());
+        return m_formatter.label();
     }
 
-    void bkpt(uint8_t imm=0)
+    void bkpt(uint8_t imm = 0)
     {
         m_formatter.oneWordOp8Imm8(OP_BKPT, imm);
     }
 
-    void clz(RegisterID rd, RegisterID rm)
+    static bool isBkpt(void* address)
+    {
+        unsigned short expected = OP_BKPT;
+        unsigned short immediateMask = 0xff;
+        unsigned short candidateInstruction = *reinterpret_cast<unsigned short*>(address);
+        return (candidateInstruction & ~immediateMask) == expected;
+    }
+
+    ALWAYS_INLINE void clz(RegisterID rd, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_CLZ, rm, FourFours(0xf, rd, 8, rm));
     }
 
-    void cmn(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void cmn(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
@@ -864,7 +1036,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_CMN_imm, rn, (RegisterID)0xf, imm);
     }
 
-    void cmp(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void cmp(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
@@ -875,14 +1047,14 @@ public:
             m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_CMP_imm_T2, rn, (RegisterID)0xf, imm);
     }
 
-    void cmp(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void cmp(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_CMP_reg_T2, rn, FourFours(shift.hi4(), 0xf, shift.lo4(), rm));
     }
 
-    void cmp(RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void cmp(RegisterID rn, RegisterID rm)
     {
         if ((rn | rm) & 8)
             cmp(rn, rm, ShiftTypeAndAmount());
@@ -891,7 +1063,7 @@ public:
     }
 
     // xor is not spelled with an 'e'. :-(
-    void eor(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void eor(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -900,7 +1072,7 @@ public:
     }
 
     // xor is not spelled with an 'e'. :-(
-    void eor(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void eor(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -919,28 +1091,28 @@ public:
             eor(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void it(Condition cond)
+    ALWAYS_INLINE void it(Condition cond)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond));
     }
 
-    void it(Condition cond, bool inst2if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if));
     }
 
-    void it(Condition cond, bool inst2if, bool inst3if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if, bool inst3if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if, inst3if));
     }
 
-    void it(Condition cond, bool inst2if, bool inst3if, bool inst4if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if, bool inst3if, bool inst4if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if, inst3if, inst4if));
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldr(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
@@ -948,10 +1120,24 @@ public:
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDR_imm_T1, imm.getUInt7() >> 2, rn, rt);
         else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
-            m_formatter.oneWordOp5Reg3Imm8(OP_LDR_imm_T2, rt, imm.getUInt10() >> 2);
+            m_formatter.oneWordOp5Reg3Imm8(OP_LDR_imm_T2, rt, static_cast<uint8_t>(imm.getUInt10() >> 2));
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDR_imm_T3, rn, rt, imm.getUInt12());
     }
+    
+    ALWAYS_INLINE void ldrWide8BitImmediate(RegisterID rt, RegisterID rn, uint8_t immediate)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDR_imm_T3, rn, rt, immediate);
+    }
+
+    ALWAYS_INLINE void ldrCompact(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(imm.isUInt7());
+        ASSERT(!((rt | rn) & 8));
+        m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDR_imm_T1, imm.getUInt7() >> 2, rn, rt);
+    }
 
     // If index is set, this is a regular offset or a pre-indexed load;
     // if index is not set then is is a post-index load.
@@ -964,7 +1150,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void ldr(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -987,7 +1173,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldr(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(!BadReg(rm));
@@ -1000,13 +1186,14 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldrh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
+        ASSERT(!(imm.getUInt12() & 1));
 
         if (!((rt | rn) & 8) && imm.isUInt6())
-            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDRH_imm_T1, imm.getUInt6() >> 2, rn, rt);
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDRH_imm_T1, imm.getUInt6() >> 1, rn, rt);
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRH_imm_T2, rn, rt, imm.getUInt12());
     }
@@ -1022,7 +1209,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void ldrh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1044,7 +1231,7 @@ public:
         m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRH_imm_T3, rn, rt, offset);
     }
 
-    void ldrh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(!BadReg(rt));   // Memory hint
         ASSERT(rn != ARMRegisters::pc); // LDRH (literal)
@@ -1091,7 +1278,7 @@ public:
         m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRB_imm_T3, rn, rt, offset);
     }
 
-    void ldrb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    ALWAYS_INLINE void ldrb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(!BadReg(rm));
@@ -1102,6 +1289,30 @@ public:
         else
             m_formatter.twoWordOp12Reg4FourFours(OP_LDRB_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
+    
+    void ldrsb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRSB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRSB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
+    void ldrsh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRSH_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRSH_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
 
     void lsl(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
@@ -1111,7 +1322,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_LSL_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void lsl(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void lsl(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1119,7 +1330,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_LSL_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void lsr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void lsr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -1127,7 +1338,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_LSR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void lsr(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void lsr(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1135,7 +1346,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_LSR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void movT3(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void movT3(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isValid());
         ASSERT(!imm.isEncodedImm());
@@ -1143,8 +1354,41 @@ public:
         
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MOV_imm_T3, imm.m_value.imm4, rd, imm);
     }
+    
+#if OS(LINUX)
+    static void revertJumpTo_movT3movtcmpT2(void* instructionStart, RegisterID left, RegisterID right, uintptr_t imm)
+    {
+        uint16_t* address = static_cast<uint16_t*>(instructionStart);
+        ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(imm));
+        ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(imm >> 16));
+        uint16_t instruction[] = {
+            twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16),
+            twoWordOp5i6Imm4Reg4EncodedImmSecond(right, lo16),
+            twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16),
+            twoWordOp5i6Imm4Reg4EncodedImmSecond(right, hi16),
+            static_cast<uint16_t>(OP_CMP_reg_T2 | left)
+        };
+        performJITMemcpy(address, instruction, sizeof(uint16_t) * 5);
+        cacheFlush(address, sizeof(uint16_t) * 5);
+    }
+#else
+    static void revertJumpTo_movT3(void* instructionStart, RegisterID rd, ARMThumbImmediate imm)
+    {
+        ASSERT(imm.isValid());
+        ASSERT(!imm.isEncodedImm());
+        ASSERT(!BadReg(rd));
+        
+        uint16_t* address = static_cast<uint16_t*>(instructionStart);
+        uint16_t instruction[] = {
+            twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, imm),
+            twoWordOp5i6Imm4Reg4EncodedImmSecond(rd, imm)
+        };
+        performJITMemcpy(address, instruction, sizeof(uint16_t) * 2);
+        cacheFlush(address, sizeof(uint16_t) * 2);
+    }
+#endif
 
-     void mov(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void mov(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isValid());
         ASSERT(!BadReg(rd));
@@ -1157,19 +1401,19 @@ public:
             movT3(rd, imm);
     }
 
-   void mov(RegisterID rd, RegisterID rm)
+    ALWAYS_INLINE void mov(RegisterID rd, RegisterID rm)
     {
         m_formatter.oneWordOp8RegReg143(OP_MOV_reg_T1, rm, rd);
     }
 
-    void movt(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void movt(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isUInt16());
         ASSERT(!BadReg(rd));
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MOVT, imm.m_value.imm4, rd, imm);
     }
 
-    void mvn(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void mvn(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isEncodedImm());
         ASSERT(!BadReg(rd));
@@ -1177,14 +1421,14 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MVN_imm, 0xf, rd, imm);
     }
 
-    void mvn(RegisterID rd, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void mvn(RegisterID rd, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp16FourFours(OP_MVN_reg_T2, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void mvn(RegisterID rd, RegisterID rm)
+    ALWAYS_INLINE void mvn(RegisterID rd, RegisterID rm)
     {
         if (!((rd | rm) & 8))
             m_formatter.oneWordOp10Reg3Reg3(OP_MVN_reg_T1, rm, rd);
@@ -1192,13 +1436,22 @@ public:
             mvn(rd, rm, ShiftTypeAndAmount());
     }
 
-    void neg(RegisterID rd, RegisterID rm)
+    ALWAYS_INLINE void mrs(RegisterID rd, SPRegisterID specReg)
+    {
+        ASSERT(specReg == ARMRegisters::apsr);
+        ASSERT(!BadReg(rd));
+        unsigned short specialRegisterBit = (specReg == ARMRegisters::apsr) ? 0 : (1 << 4);
+        OpcodeID1 mrsOp = static_cast<OpcodeID1>(OP_MRS_T1 | specialRegisterBit);
+        m_formatter.twoWordOp16FourFours(mrsOp, FourFours(0x8, rd, 0, 0));
+    }
+
+    ALWAYS_INLINE void neg(RegisterID rd, RegisterID rm)
     {
         ARMThumbImmediate zero = ARMThumbImmediate::makeUInt12(0);
         sub(rd, zero, rm);
     }
 
-    void orr(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void orr(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1206,7 +1459,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_ORR_imm_T1, rn, rd, imm);
     }
 
-    void orr(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void orr(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1224,7 +1477,7 @@ public:
             orr(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void orr_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void orr_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1242,7 +1495,7 @@ public:
             orr_S(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void ror(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void ror(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -1250,7 +1503,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_ROR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void ror(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void ror(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1258,7 +1511,57 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_ROR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void smull(RegisterID rdLo, RegisterID rdHi, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void pop(RegisterID dest)
+    {
+        if (dest < ARMRegisters::r8)
+            m_formatter.oneWordOp7Imm9(OP_POP_T1, 1 << dest);
+        else {
+            // Load postindexed with writeback.
+            ldr(dest, ARMRegisters::sp, sizeof(void*), false, true);
+        }
+    }
+
+    ALWAYS_INLINE void pop(uint32_t registerList)
+    {
+        ASSERT(WTF::bitCount(registerList) > 1);
+        ASSERT(!((1 << ARMRegisters::pc) & registerList) || !((1 << ARMRegisters::lr) & registerList));
+        ASSERT(!((1 << ARMRegisters::sp) & registerList));
+        m_formatter.twoWordOp16Imm16(OP_POP_T2, registerList);
+    }
+
+    ALWAYS_INLINE void push(RegisterID src)
+    {
+        if (src < ARMRegisters::r8)
+            m_formatter.oneWordOp7Imm9(OP_PUSH_T1, 1 << src);
+        else if (src == ARMRegisters::lr)
+            m_formatter.oneWordOp7Imm9(OP_PUSH_T1, 0x100);
+        else {
+            // Store preindexed with writeback.
+            str(src, ARMRegisters::sp, -sizeof(void*), true, true);
+        }
+    }
+
+    ALWAYS_INLINE void push(uint32_t registerList)
+    {
+        ASSERT(WTF::bitCount(registerList) > 1);
+        ASSERT(!((1 << ARMRegisters::pc) & registerList));
+        ASSERT(!((1 << ARMRegisters::sp) & registerList));
+        m_formatter.twoWordOp16Imm16(OP_PUSH_T2, registerList);
+    }
+
+#if HAVE(ARM_IDIV_INSTRUCTIONS)
+    template<int datasize>
+    ALWAYS_INLINE void sdiv(RegisterID rd, RegisterID rn, RegisterID rm)
+    {
+        static_assert(datasize == 32, "sdiv datasize must be 32 for armv7s");        
+        ASSERT(!BadReg(rd));
+        ASSERT(!BadReg(rn));
+        ASSERT(!BadReg(rm));
+        m_formatter.twoWordOp12Reg4FourFours(OP_SDIV_T1, rn, FourFours(0xf, rd, 0xf, rm));
+    }
+#endif
+
+    ALWAYS_INLINE void smull(RegisterID rdLo, RegisterID rdHi, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rdLo));
         ASSERT(!BadReg(rdHi));
@@ -1269,7 +1572,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void str(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1278,7 +1581,7 @@ public:
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STR_imm_T1, imm.getUInt7() >> 2, rn, rt);
         else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
-            m_formatter.oneWordOp5Reg3Imm8(OP_STR_imm_T2, rt, imm.getUInt10() >> 2);
+            m_formatter.oneWordOp5Reg3Imm8(OP_STR_imm_T2, rt, static_cast<uint8_t>(imm.getUInt10() >> 2));
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STR_imm_T3, rn, rt, imm.getUInt12());
     }
@@ -1294,7 +1597,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void str(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1317,7 +1620,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void str(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
@@ -1329,7 +1632,125 @@ public:
             m_formatter.twoWordOp12Reg4FourFours(OP_STR_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
 
-    void sub(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isUInt12());
+
+        if (!((rt | rn) & 8) && imm.isUInt7())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STRB_imm_T1, imm.getUInt7() >> 2, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRB_imm_T2, rn, rt, imm.getUInt12());
+    }
+
+    // If index is set, this is a regular offset or a pre-indexed store;
+    // if index is not set then is is a post-index store.
+    //
+    // If wback is set rn is updated - this is a pre or post index store,
+    // if wback is not set this is a regular offset memory access.
+    //
+    // (-255 <= offset <= 255)
+    // _reg = REG[rn]
+    // _tmp = _reg + offset
+    // MEM[index ? _tmp : _reg] = REG[rt]
+    // if (wback) REG[rn] = _tmp
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+    
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+        ASSERT((offset & ~0xff) == 0);
+        
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+        
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRB_imm_T3, rn, rt, offset);
+    }
+
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_STRB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_STRB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+    
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isUInt12());
+        
+        if (!((rt | rn) & 8) && imm.isUInt6())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STRH_imm_T1, imm.getUInt6() >> 1, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRH_imm_T2, rn, rt, imm.getUInt12());
+    }
+    
+    // If index is set, this is a regular offset or a pre-indexed store;
+    // if index is not set then is is a post-index store.
+    //
+    // If wback is set rn is updated - this is a pre or post index store,
+    // if wback is not set this is a regular offset memory access.
+    //
+    // (-255 <= offset <= 255)
+    // _reg = REG[rn]
+    // _tmp = _reg + offset
+    // MEM[index ? _tmp : _reg] = REG[rt]
+    // if (wback) REG[rn] = _tmp
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+        
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+        ASSERT(!(offset & ~0xff));
+        
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+        
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRH_imm_T3, rn, rt, offset);
+    }
+    
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_STRH_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_STRH_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
@@ -1338,7 +1759,8 @@ public:
         ASSERT(imm.isValid());
 
         if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
-            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
+            ASSERT(!(imm.getUInt16() & 3));
+            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, static_cast<uint8_t>(imm.getUInt9() >> 2));
             return;
         } else if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
@@ -1358,7 +1780,7 @@ public:
         }
     }
 
-    void sub(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
+    ALWAYS_INLINE void sub(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
     {
         ASSERT(rd != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1371,7 +1793,7 @@ public:
             m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_RSB_imm_T2, rn, rd, imm);
     }
 
-    void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -1381,7 +1803,7 @@ public:
     }
 
     // NOTE: In an IT block, add doesn't modify the flags register.
-    void sub(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
             m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_reg_T1, rm, rn, rd);
@@ -1399,7 +1821,8 @@ public:
         ASSERT(imm.isValid());
 
         if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
-            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
+            ASSERT(!(imm.getUInt16() & 3));
+            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, static_cast<uint8_t>(imm.getUInt9() >> 2));
             return;
         } else if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
@@ -1414,8 +1837,18 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_SUB_S_imm_T3, rn, rd, imm);
     }
 
+    ALWAYS_INLINE void sub_S(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
+    {
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt12());
+
+        m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_RSB_S_imm_T2, rn, rd, imm);
+    }
+
     // Not allowed in an IT (if then) block?
-    void sub_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void sub_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -1425,7 +1858,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void sub_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void sub_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
             m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_reg_T1, rm, rn, rd);
@@ -1433,7 +1866,7 @@ public:
             sub_S(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void tst(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void tst(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rn));
         ASSERT(imm.isEncodedImm());
@@ -1441,14 +1874,14 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_TST_imm, rn, (RegisterID)0xf, imm);
     }
 
-    void tst(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void tst(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rn));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_TST_reg_T2, rn, FourFours(shift.hi4(), 0xf, shift.lo4(), rm));
     }
 
-    void tst(RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void tst(RegisterID rn, RegisterID rm)
     {
         if ((rn | rm) & 8)
             tst(rn, rm, ShiftTypeAndAmount());
@@ -1456,34 +1889,58 @@ public:
             m_formatter.oneWordOp10Reg3Reg3(OP_TST_reg_T1, rm, rn);
     }
 
-    void vadd_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    ALWAYS_INLINE void ubfx(RegisterID rd, RegisterID rn, unsigned lsb, unsigned width)
+    {
+        ASSERT(lsb < 32);
+        ASSERT((width >= 1) && (width <= 32));
+        ASSERT((lsb + width) <= 32);
+        m_formatter.twoWordOp12Reg40Imm3Reg4Imm20Imm5(OP_UBFX_T1, rd, rn, (lsb & 0x1c) << 10, (lsb & 0x3) << 6, (width - 1) & 0x1f);
+    }
+
+#if HAVE(ARM_IDIV_INSTRUCTIONS)
+    ALWAYS_INLINE void udiv(RegisterID rd, RegisterID rn, RegisterID rm)
+    {
+        ASSERT(!BadReg(rd));
+        ASSERT(!BadReg(rn));
+        ASSERT(!BadReg(rm));
+        m_formatter.twoWordOp12Reg4FourFours(OP_UDIV_T1, rn, FourFours(0xf, rd, 0xf, rm));
+    }
+#endif
+
+    void vadd(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
         m_formatter.vfpOp(OP_VADD_T2, OP_VADD_T2b, true, rn, rd, rm);
     }
 
-    void vcmp_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    void vcmp(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
     {
         m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(4), rd, rm);
     }
 
-    void vcmpz_F64(FPDoubleRegisterID rd)
+    void vcmpz(FPDoubleRegisterID rd)
     {
         m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(5), rd, VFPOperand(0));
     }
 
-    void vcvt_F64_S32(FPDoubleRegisterID rd, FPSingleRegisterID rm)
+    void vcvt_signedToFloatingPoint(FPDoubleRegisterID rd, FPSingleRegisterID rm)
     {
         // boolean values are 64bit (toInt, unsigned, roundZero)
         m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(false, false, false), rd, rm);
     }
 
-    void vcvtr_S32_F64(FPSingleRegisterID rd, FPDoubleRegisterID rm)
+    void vcvt_floatingPointToSigned(FPSingleRegisterID rd, FPDoubleRegisterID rm)
     {
         // boolean values are 64bit (toInt, unsigned, roundZero)
         m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(true, false, true), rd, rm);
     }
+    
+    void vcvt_floatingPointToUnsigned(FPSingleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(true, true, true), rd, rm);
+    }
 
-    void vdiv_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    void vdiv(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
         m_formatter.vfpOp(OP_VDIV, OP_VDIVb, true, rn, rd, rm);
     }
@@ -1492,17 +1949,41 @@ public:
     {
         m_formatter.vfpMemOp(OP_VLDR, OP_VLDRb, true, rn, rd, imm);
     }
+    
+    void flds(FPSingleRegisterID rd, RegisterID rn, int32_t imm)
+    {
+        m_formatter.vfpMemOp(OP_FLDS, OP_FLDSb, false, rn, rd, imm);
+    }
 
     void vmov(RegisterID rd, FPSingleRegisterID rn)
     {
         ASSERT(!BadReg(rd));
-        m_formatter.vfpOp(OP_VMOV_CtoS, OP_VMOV_CtoSb, false, rn, rd, VFPOperand(0));
+        m_formatter.vfpOp(OP_VMOV_StoC, OP_VMOV_StoCb, false, rn, rd, VFPOperand(0));
     }
 
     void vmov(FPSingleRegisterID rd, RegisterID rn)
     {
         ASSERT(!BadReg(rn));
-        m_formatter.vfpOp(OP_VMOV_StoC, OP_VMOV_StoCb, false, rd, rn, VFPOperand(0));
+        m_formatter.vfpOp(OP_VMOV_CtoS, OP_VMOV_CtoSb, false, rd, rn, VFPOperand(0));
+    }
+
+    void vmov(RegisterID rd1, RegisterID rd2, FPDoubleRegisterID rn)
+    {
+        ASSERT(!BadReg(rd1));
+        ASSERT(!BadReg(rd2));
+        m_formatter.vfpOp(OP_VMOV_DtoC, OP_VMOV_DtoCb, true, rd2, VFPOperand(rd1 | 16), rn);
+    }
+
+    void vmov(FPDoubleRegisterID rd, RegisterID rn1, RegisterID rn2)
+    {
+        ASSERT(!BadReg(rn1));
+        ASSERT(!BadReg(rn2));
+        m_formatter.vfpOp(OP_VMOV_CtoD, OP_VMOV_CtoDb, true, rn2, VFPOperand(rn1 | 16), rd);
+    }
+
+    void vmov(FPDoubleRegisterID rd, FPDoubleRegisterID rn)
+    {
+        m_formatter.vfpOp(OP_VMOV_T2, OP_VMOV_T2b, true, VFPOperand(0), rd, rn);
     }
 
     void vmrs(RegisterID reg = ARMRegisters::pc)
@@ -1511,7 +1992,7 @@ public:
         m_formatter.vfpOp(OP_VMRS, OP_VMRSb, false, VFPOperand(1), VFPOperand(0x10 | reg), VFPOperand(0));
     }
 
-    void vmul_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    void vmul(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
         m_formatter.vfpOp(OP_VMUL_T2, OP_VMUL_T2b, true, rn, rd, rm);
     }
@@ -1521,14 +2002,124 @@ public:
         m_formatter.vfpMemOp(OP_VSTR, OP_VSTRb, true, rn, rd, imm);
     }
 
-    void vsub_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    void fsts(FPSingleRegisterID rd, RegisterID rn, int32_t imm)
+    {
+        m_formatter.vfpMemOp(OP_FSTS, OP_FSTSb, false, rn, rd, imm);
+    }
+
+    void vsub(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
         m_formatter.vfpOp(OP_VSUB_T2, OP_VSUB_T2b, true, rn, rd, rm);
     }
 
+    void vabs(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VABS_T2, OP_VABS_T2b, true, VFPOperand(16), rd, rm);
+    }
+
+    void vneg(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VNEG_T2, OP_VNEG_T2b, true, VFPOperand(1), rd, rm);
+    }
+
+    void vsqrt(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VSQRT_T1, OP_VSQRT_T1b, true, VFPOperand(17), rd, rm);
+    }
+    
+    void vcvtds(FPDoubleRegisterID rd, FPSingleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VCVTDS_T1, OP_VCVTDS_T1b, false, VFPOperand(23), rd, rm);
+    }
+
+    void vcvtsd(FPSingleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VCVTSD_T1, OP_VCVTSD_T1b, true, VFPOperand(23), rd, rm);
+    }
+
+    void nop()
+    {
+        m_formatter.oneWordOp8Imm8(OP_NOP_T1, 0);
+    }
+
+    void nopw()
+    {
+        m_formatter.twoWordOp16Op16(OP_NOP_T2a, OP_NOP_T2b);
+    }
+    
+    static constexpr int16_t nopPseudo16()
+    {
+        return OP_NOP_T1;
+    }
+
+    static constexpr int32_t nopPseudo32()
+    {
+        return OP_NOP_T2a | (OP_NOP_T2b << 16);
+    }
+
+    static void fillNops(void* base, size_t size, bool isCopyingToExecutableMemory)
+    {
+        RELEASE_ASSERT(!(size % sizeof(int16_t)));
+
+        char* ptr = static_cast<char*>(base);
+        const size_t num32s = size / sizeof(int32_t);
+        for (size_t i = 0; i < num32s; i++) {
+            const int32_t insn = nopPseudo32();
+            if (isCopyingToExecutableMemory)
+                performJITMemcpy(ptr, &insn, sizeof(int32_t));
+            else
+                memcpy(ptr, &insn, sizeof(int32_t));
+            ptr += sizeof(int32_t);
+        }
+
+        const size_t num16s = (size % sizeof(int32_t)) / sizeof(int16_t);
+        ASSERT(num16s == 0 || num16s == 1);
+        ASSERT(num16s * sizeof(int16_t) + num32s * sizeof(int32_t) == size);
+        if (num16s) {
+            const int16_t insn = nopPseudo16();
+            if (isCopyingToExecutableMemory)
+                performJITMemcpy(ptr, &insn, sizeof(int16_t));
+            else
+                memcpy(ptr, &insn, sizeof(int16_t));
+        }
+    }
+
+    void dmbSY()
+    {
+        m_formatter.twoWordOp16Op16(OP_DMB_T1a, OP_DMB_SY_T1b);
+    }
+
+    void dmbISHST()
+    {
+        m_formatter.twoWordOp16Op16(OP_DMB_T1a, OP_DMB_ISHST_T1b);
+    }
+
+    AssemblerLabel labelIgnoringWatchpoints()
+    {
+        return m_formatter.label();
+    }
+
+    AssemblerLabel labelForWatchpoint()
+    {
+        AssemblerLabel result = m_formatter.label();
+        if (static_cast<int>(result.m_offset) != m_indexOfLastWatchpoint)
+            result = label();
+        m_indexOfLastWatchpoint = result.m_offset;
+        m_indexOfTailOfLastWatchpoint = result.m_offset + maxJumpReplacementSize();
+        return result;
+    }
+
     AssemblerLabel label()
     {
-        return AssemblerLabel(m_formatter.label());
+        AssemblerLabel result = m_formatter.label();
+        while (UNLIKELY(static_cast<int>(result.m_offset) < m_indexOfTailOfLastWatchpoint)) {
+            if (UNLIKELY(static_cast<int>(result.m_offset) + 4 <= m_indexOfTailOfLastWatchpoint))
+                nopw();
+            else
+                nop();
+            result = m_formatter.label();
+        }
+        return result;
     }
     
     AssemblerLabel align(int alignment)
@@ -1550,23 +2141,16 @@ public:
         return b.m_offset - a.m_offset;
     }
 
-    int executableOffsetFor(int location)
-    {
-        if (!location)
-            return 0;
-        return static_cast<int32_t*>(m_formatter.data())[location / sizeof(int32_t) - 1];
-    }
-    
-    int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JumpPaddingSizes[jumpType] - JumpSizes[jumpLinkType]; }
+    static int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JUMP_ENUM_SIZE(jumpType) - JUMP_ENUM_SIZE(jumpLinkType); }
     
     // Assembler admin methods:
 
-    static bool linkRecordSourceComparator(const LinkRecord& a, const LinkRecord& b)
+    static ALWAYS_INLINE bool linkRecordSourceComparator(const LinkRecord& a, const LinkRecord& b)
     {
         return a.from() < b.from();
     }
 
-    bool canCompact(JumpType jumpType)
+    static bool canCompact(JumpType jumpType)
     {
         // The following cannot be compacted:
         //   JumpFixed: represents custom jump sequence
@@ -1575,7 +2159,7 @@ public:
         return (jumpType == JumpNoCondition) || (jumpType == JumpCondition);
     }
     
-    JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
+    static JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
     {
         if (jumpType == JumpFixed)
             return LinkInvalid;
@@ -1586,38 +2170,31 @@ public:
         if (jumpType == JumpConditionFixedSize)
             return LinkConditionalBX;
         
-        const int paddingSize = JumpPaddingSizes[jumpType];
-        bool mayTriggerErrata = false;
+        const int paddingSize = JUMP_ENUM_SIZE(jumpType);
         
         if (jumpType == JumpCondition) {
             // 2-byte conditional T1
-            const uint16_t* jumpT1Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT1]));
+            const uint16_t* jumpT1Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT1)));
             if (canBeJumpT1(jumpT1Location, to))
                 return LinkJumpT1;
             // 4-byte conditional T3
-            const uint16_t* jumpT3Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT3]));
-            if (canBeJumpT3(jumpT3Location, to, mayTriggerErrata)) {
-                if (!mayTriggerErrata)
-                    return LinkJumpT3;
-            }
+            const uint16_t* jumpT3Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT3)));
+            if (canBeJumpT3(jumpT3Location, to))
+                return LinkJumpT3;
             // 4-byte conditional T4 with IT
             const uint16_t* conditionalJumpT4Location = 
-            reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkConditionalJumpT4]));
-            if (canBeJumpT4(conditionalJumpT4Location, to, mayTriggerErrata)) {
-                if (!mayTriggerErrata)
-                    return LinkConditionalJumpT4;
-            }
+            reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkConditionalJumpT4)));
+            if (canBeJumpT4(conditionalJumpT4Location, to))
+                return LinkConditionalJumpT4;
         } else {
             // 2-byte unconditional T2
-            const uint16_t* jumpT2Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT2]));
+            const uint16_t* jumpT2Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT2)));
             if (canBeJumpT2(jumpT2Location, to))
                 return LinkJumpT2;
             // 4-byte unconditional T4
-            const uint16_t* jumpT4Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT4]));
-            if (canBeJumpT4(jumpT4Location, to, mayTriggerErrata)) {
-                if (!mayTriggerErrata)
-                    return LinkJumpT4;
-            }
+            const uint16_t* jumpT4Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT4)));
+            if (canBeJumpT4(jumpT4Location, to))
+                return LinkJumpT4;
             // use long jump sequence
             return LinkBX;
         }
@@ -1626,54 +2203,46 @@ public:
         return LinkConditionalBX;
     }
     
-    JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
+    static JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
     {
         JumpLinkType linkType = computeJumpType(record.type(), from, to);
         record.setLinkType(linkType);
         return linkType;
     }
     
-    void recordLinkOffsets(int32_t regionStart, int32_t regionEnd, int32_t offset)
-    {
-        int32_t ptr = regionStart / sizeof(int32_t);
-        const int32_t end = regionEnd / sizeof(int32_t);
-        int32_t* offsets = static_cast<int32_t*>(m_formatter.data());
-        while (ptr < end)
-            offsets[ptr++] = offset;
-    }
-    
-    Vector<LinkRecord>& jumpsToLink()
+    Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink()
     {
         std::sort(m_jumpsToLink.begin(), m_jumpsToLink.end(), linkRecordSourceComparator);
         return m_jumpsToLink;
     }
 
-    void link(LinkRecord& record, uint8_t* from, uint8_t* to)
+    static void ALWAYS_INLINE link(LinkRecord& record, uint8_t* from, const uint8_t* fromInstruction8, uint8_t* to)
     {
+        const uint16_t* fromInstruction = reinterpret_cast_ptr<const uint16_t*>(fromInstruction8);
         switch (record.linkType()) {
         case LinkJumpT1:
-            linkJumpT1(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            linkJumpT1(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkJumpT2:
-            linkJumpT2(reinterpret_cast<uint16_t*>(from), to);
+            linkJumpT2(reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkJumpT3:
-            linkJumpT3(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            linkJumpT3(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkJumpT4:
-            linkJumpT4(reinterpret_cast<uint16_t*>(from), to);
+            linkJumpT4(reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkConditionalJumpT4:
-            linkConditionalJumpT4(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            linkConditionalJumpT4(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkConditionalBX:
-            linkConditionalBX(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            linkConditionalBX(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         case LinkBX:
-            linkBX(reinterpret_cast<uint16_t*>(from), to);
+            linkBX(reinterpret_cast_ptr<uint16_t*>(from), fromInstruction, to);
             break;
         default:
-            ASSERT_NOT_REACHED();
+            RELEASE_ASSERT_NOT_REACHED();
             break;
         }
     }
@@ -1707,53 +2276,219 @@ public:
         ASSERT(from.isSet());
         
         uint16_t* location = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset);
-        linkJumpAbsolute(location, to);
+        linkJumpAbsolute(location, location, to);
     }
 
     static void linkCall(void* code, AssemblerLabel from, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(code) & 1));
         ASSERT(from.isSet());
-        ASSERT(reinterpret_cast<intptr_t>(to) & 1);
 
-        setPointer(reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset) - 1, to);
+        setPointer(reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset) - 1, to, false);
     }
 
     static void linkPointer(void* code, AssemblerLabel where, void* value)
     {
-        setPointer(reinterpret_cast<char*>(code) + where.m_offset, value);
+        setPointer(reinterpret_cast<char*>(code) + where.m_offset, value, false);
     }
 
+    // The static relink and replace methods can use can use |from| for both
+    // the write and executable address for call and jump patching
+    // as they're modifying existing (linked) code, so the address being
+    // provided is correct for relative address computation.
     static void relinkJump(void* from, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 1));
 
-        linkJumpAbsolute(reinterpret_cast<uint16_t*>(from), to);
+        linkJumpAbsolute(reinterpret_cast<uint16_t*>(from), reinterpret_cast<uint16_t*>(from), to);
 
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 5 * sizeof(uint16_t));
+        cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 5 * sizeof(uint16_t));
+    }
+
+    static void relinkJumpToNop(void* from)
+    {
+        relinkJump(from, from);
     }
     
     static void relinkCall(void* from, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 1));
-        ASSERT(reinterpret_cast<intptr_t>(to) & 1);
 
-        setPointer(reinterpret_cast<uint16_t*>(from) - 1, to);
+        setPointer(reinterpret_cast<uint16_t*>(from) - 1, to, true);
+    }
+    
+    static void* readCallTarget(void* from)
+    {
+        return readPointer(reinterpret_cast<uint16_t*>(from) - 1);
     }
 
     static void repatchInt32(void* where, int32_t value)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
-        setInt32(where, value);
+        setInt32(where, value, true);
+    }
+    
+    static void repatchCompact(void* where, int32_t offset)
+    {
+        ASSERT(offset >= -255 && offset <= 255);
+
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+        
+        offset |= (add << 9);
+        offset |= (1 << 10);
+        offset |= (1 << 11);
+
+        uint16_t* location = reinterpret_cast<uint16_t*>(where);
+        uint16_t instruction = location[1] & ~((1 << 12) - 1);
+        instruction |= offset;
+        performJITMemcpy(location + 1, &instruction, sizeof(uint16_t));
+        cacheFlush(location, sizeof(uint16_t) * 2);
     }
 
     static void repatchPointer(void* where, void* value)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
-        setPointer(where, value);
+        setPointer(where, value, true);
+    }
+
+    static void* readPointer(void* where)
+    {
+        return reinterpret_cast<void*>(readInt32(where));
+    }
+
+    static void replaceWithJump(void* instructionStart, void* to)
+    {
+        ASSERT(!(bitwise_cast<uintptr_t>(instructionStart) & 1));
+        ASSERT(!(bitwise_cast<uintptr_t>(to) & 1));
+
+#if OS(LINUX)
+        if (canBeJumpT4(reinterpret_cast<uint16_t*>(instructionStart), to)) {
+            uint16_t* ptr = reinterpret_cast<uint16_t*>(instructionStart) + 2;
+            linkJumpT4(ptr, ptr, to);
+            cacheFlush(ptr - 2, sizeof(uint16_t) * 2);
+        } else {
+            uint16_t* ptr = reinterpret_cast<uint16_t*>(instructionStart) + 5;
+            linkBX(ptr, ptr, to);
+            cacheFlush(ptr - 5, sizeof(uint16_t) * 5);
+        }
+#else
+        uint16_t* ptr = reinterpret_cast<uint16_t*>(instructionStart) + 2;
+        linkJumpT4(ptr, ptr, to);
+        cacheFlush(ptr - 2, sizeof(uint16_t) * 2);
+#endif
+    }
+    
+    static ptrdiff_t maxJumpReplacementSize()
+    {
+#if OS(LINUX)
+        return 10;
+#else
+        return 4;
+#endif
+    }
+
+    static constexpr ptrdiff_t patchableJumpSize()
+    {
+        return 10;
+    }
+    
+    static void replaceWithLoad(void* instructionStart)
+    {
+        ASSERT(!(bitwise_cast<uintptr_t>(instructionStart) & 1));
+        uint16_t* ptr = reinterpret_cast<uint16_t*>(instructionStart);
+        switch (ptr[0] & 0xFFF0) {
+        case OP_LDR_imm_T3:
+            break;
+        case OP_ADD_imm_T3: {
+            ASSERT(!(ptr[1] & 0xF000));
+            uint16_t instructions[2];
+            instructions[0] = ptr[0] & 0x000F;
+            instructions[0] |= OP_LDR_imm_T3;
+            instructions[1] = ptr[1] | (ptr[1] & 0x0F00) << 4;
+            instructions[1] &= 0xF0FF;
+            performJITMemcpy(ptr, instructions, sizeof(uint16_t) * 2);
+            cacheFlush(ptr, sizeof(uint16_t) * 2);
+            break;
+        }
+        default:
+            RELEASE_ASSERT_NOT_REACHED();
+        }
+    }
+
+    static void replaceWithAddressComputation(void* instructionStart)
+    {
+        ASSERT(!(bitwise_cast<uintptr_t>(instructionStart) & 1));
+        uint16_t* ptr = reinterpret_cast<uint16_t*>(instructionStart);
+        switch (ptr[0] & 0xFFF0) {
+        case OP_LDR_imm_T3: {
+            ASSERT(!(ptr[1] & 0x0F00));
+            uint16_t instructions[2];
+            instructions[0] = ptr[0] & 0x000F;
+            instructions[0] |= OP_ADD_imm_T3;
+            instructions[1] = ptr[1] | (ptr[1] & 0xF000) >> 4;
+            instructions[1] &= 0x0FFF;
+            performJITMemcpy(ptr, instructions, sizeof(uint16_t) * 2);
+            cacheFlush(ptr, sizeof(uint16_t) * 2);
+            break;
+        }
+        case OP_ADD_imm_T3:
+            break;
+        default:
+            RELEASE_ASSERT_NOT_REACHED();
+        }
+    }
+
+    unsigned debugOffset() { return m_formatter.debugOffset(); }
+
+#if OS(LINUX)
+    static inline void linuxPageFlush(uintptr_t begin, uintptr_t end)
+    {
+        asm volatile(
+            "push    {r7}\n"
+            "mov     r0, %0\n"
+            "mov     r1, %1\n"
+            "movw    r7, #0x2\n"
+            "movt    r7, #0xf\n"
+            "movs    r2, #0x0\n"
+            "svc     0x0\n"
+            "pop     {r7}\n"
+            :
+            : "r" (begin), "r" (end)
+            : "r0", "r1", "r2");
+    }
+#endif
+
+    static void cacheFlush(void* code, size_t size)
+    {
+#if OS(IOS)
+        sys_cache_control(kCacheFunctionPrepareForExecution, code, size);
+#elif OS(LINUX)
+        size_t page = pageSize();
+        uintptr_t current = reinterpret_cast<uintptr_t>(code);
+        uintptr_t end = current + size;
+        uintptr_t firstPageEnd = (current & ~(page - 1)) + page;
+
+        if (end <= firstPageEnd) {
+            linuxPageFlush(current, end);
+            return;
+        }
+
+        linuxPageFlush(current, firstPageEnd);
+
+        for (current = firstPageEnd; current + page < end; current += page)
+            linuxPageFlush(current, current + page);
+
+        linuxPageFlush(current, end);
+#else
+#error "The cacheFlush support is missing on this platform."
+#endif
     }
 
 private:
@@ -1815,6 +2550,7 @@ private:
             if (isRoundZero)
                 op |= 0x10;
         } else {
+            ASSERT(!isRoundZero);
             // 'op' field in instruction is isUnsigned
             if (!isUnsigned)
                 op |= 0x10;
@@ -1822,59 +2558,92 @@ private:
         return VFPOperand(op);
     }
 
-    static void setInt32(void* code, uint32_t value)
+    static void setInt32(void* code, uint32_t value, bool flush)
     {
         uint16_t* location = reinterpret_cast<uint16_t*>(code);
         ASSERT(isMOV_imm_T3(location - 4) && isMOVT(location - 2));
 
         ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(value));
         ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(value >> 16));
-        location[-4] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
-        location[-3] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-3] >> 8) & 0xf, lo16);
-        location[-2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
-        location[-1] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-1] >> 8) & 0xf, hi16);
+        uint16_t instructions[4];
+        instructions[0] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+        instructions[1] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-3] >> 8) & 0xf, lo16);
+        instructions[2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+        instructions[3] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-1] >> 8) & 0xf, hi16);
 
-        ExecutableAllocator::cacheFlush(location - 4, 4 * sizeof(uint16_t));
+        performJITMemcpy(location - 4, instructions, 4 * sizeof(uint16_t));
+        if (flush)
+            cacheFlush(location - 4, 4 * sizeof(uint16_t));
+    }
+    
+    static int32_t readInt32(void* code)
+    {
+        uint16_t* location = reinterpret_cast<uint16_t*>(code);
+        ASSERT(isMOV_imm_T3(location - 4) && isMOVT(location - 2));
+        
+        ARMThumbImmediate lo16;
+        ARMThumbImmediate hi16;
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(lo16, location[-4]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(lo16, location[-3]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(hi16, location[-2]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(hi16, location[-1]);
+        uint32_t result = hi16.asUInt16();
+        result <<= 16;
+        result |= lo16.asUInt16();
+        return static_cast<int32_t>(result);
     }
 
-    static void setPointer(void* code, void* value)
+    static void setUInt7ForLoad(void* code, ARMThumbImmediate imm)
     {
-        setInt32(code, reinterpret_cast<uint32_t>(value));
+        // Requires us to have planted a LDR_imm_T1
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt7());
+        uint16_t* location = reinterpret_cast<uint16_t*>(code);
+        uint16_t instruction;
+        instruction = location[0] & ~((static_cast<uint16_t>(0x7f) >> 2) << 6);
+        instruction |= (imm.getUInt7() >> 2) << 6;
+        performJITMemcpy(location, &instruction, sizeof(uint16_t));
+        cacheFlush(location, sizeof(uint16_t));
+    }
+
+    static void setPointer(void* code, void* value, bool flush)
+    {
+        setInt32(code, reinterpret_cast<uint32_t>(value), flush);
     }
 
-    static bool isB(void* address)
+    static bool isB(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return ((instruction[0] & 0xf800) == OP_B_T4a) && ((instruction[1] & 0xd000) == OP_B_T4b);
     }
 
-    static bool isBX(void* address)
+    static bool isBX(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return (instruction[0] & 0xff87) == OP_BX;
     }
 
-    static bool isMOV_imm_T3(void* address)
+    static bool isMOV_imm_T3(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return ((instruction[0] & 0xFBF0) == OP_MOV_imm_T3) && ((instruction[1] & 0x8000) == 0);
     }
 
-    static bool isMOVT(void* address)
+    static bool isMOVT(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return ((instruction[0] & 0xFBF0) == OP_MOVT) && ((instruction[1] & 0x8000) == 0);
     }
 
-    static bool isNOP_T1(void* address)
+    static bool isNOP_T1(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return instruction[0] == OP_NOP_T1;
     }
 
-    static bool isNOP_T2(void* address)
+    static bool isNOP_T2(const void* address)
     {
-        uint16_t* instruction = static_cast<uint16_t*>(address);
+        const uint16_t* instruction = static_cast<const uint16_t*>(address);
         return (instruction[0] == OP_NOP_T2a) && (instruction[1] == OP_NOP_T2b);
     }
 
@@ -1904,49 +2673,25 @@ private:
         return ((relative << 20) >> 20) == relative;
     }
     
-    static bool canBeJumpT3(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    static bool canBeJumpT3(const uint16_t* instruction, const void* target)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
         
         intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
-        // From Cortex-A8 errata:
-        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
-        // the target of the branch falls within the first region it is
-        // possible for the processor to incorrectly determine the branch
-        // instruction, and it is also possible in some cases for the processor
-        // to enter a deadlock state.
-        // The instruction is spanning two pages if it ends at an address ending 0x002
-        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
-        mayTriggerErrata = spansTwo4K;
-        // The target is in the first page if the jump branch back by [3..0x1002] bytes
-        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
-        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
-        return ((relative << 11) >> 11) == relative && !wouldTriggerA8Errata;
+        return ((relative << 11) >> 11) == relative;
     }
     
-    static bool canBeJumpT4(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    static bool canBeJumpT4(const uint16_t* instruction, const void* target)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
         
         intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
-        // From Cortex-A8 errata:
-        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
-        // the target of the branch falls within the first region it is
-        // possible for the processor to incorrectly determine the branch
-        // instruction, and it is also possible in some cases for the processor
-        // to enter a deadlock state.
-        // The instruction is spanning two pages if it ends at an address ending 0x002
-        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
-        mayTriggerErrata = spansTwo4K;
-        // The target is in the first page if the jump branch back by [3..0x1002] bytes
-        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
-        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
-        return ((relative << 7) >> 7) == relative && !wouldTriggerA8Errata;
+        return ((relative << 7) >> 7) == relative;
     }
     
-    void linkJumpT1(Condition cond, uint16_t* instruction, void* target)
+    static void linkJumpT1(Condition cond, uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(        
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
@@ -1961,10 +2706,11 @@ private:
         
         // All branch offsets should be an even distance.
         ASSERT(!(relative & 1));
-        instruction[-1] = OP_B_T1 | ((cond & 0xf) << 8) | ((relative & 0x1fe) >> 1);
+        uint16_t newInstruction = OP_B_T1 | ((cond & 0xf) << 8) | ((relative & 0x1fe) >> 1);
+        performJITMemcpy(writeTarget - 1, &newInstruction, sizeof(uint16_t));
     }
     
-    static void linkJumpT2(uint16_t* instruction, void* target)
+    static void linkJumpT2(uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(        
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
@@ -1979,34 +2725,33 @@ private:
         
         // All branch offsets should be an even distance.
         ASSERT(!(relative & 1));
-        instruction[-1] = OP_B_T2 | ((relative & 0xffe) >> 1);
+        uint16_t newInstruction = OP_B_T2 | ((relative & 0xffe) >> 1);
+        performJITMemcpy(writeTarget - 1, &newInstruction, sizeof(uint16_t));
     }
     
-    void linkJumpT3(Condition cond, uint16_t* instruction, void* target)
+    static void linkJumpT3(Condition cond, uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
-        bool scratch;
-        UNUSED_PARAM(scratch);
-        ASSERT(canBeJumpT3(instruction, target, scratch));
+        ASSERT(canBeJumpT3(instruction, target));
         
         intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
         
         // All branch offsets should be an even distance.
         ASSERT(!(relative & 1));
-        instruction[-2] = OP_B_T3a | ((relative & 0x100000) >> 10) | ((cond & 0xf) << 6) | ((relative & 0x3f000) >> 12);
-        instruction[-1] = OP_B_T3b | ((relative & 0x80000) >> 8) | ((relative & 0x40000) >> 5) | ((relative & 0xffe) >> 1);
+        uint16_t instructions[2];
+        instructions[0] = OP_B_T3a | ((relative & 0x100000) >> 10) | ((cond & 0xf) << 6) | ((relative & 0x3f000) >> 12);
+        instructions[1] = OP_B_T3b | ((relative & 0x80000) >> 8) | ((relative & 0x40000) >> 5) | ((relative & 0xffe) >> 1);
+        performJITMemcpy(writeTarget - 2, instructions, 2 * sizeof(uint16_t));
     }
     
-    static void linkJumpT4(uint16_t* instruction, void* target)
+    static void linkJumpT4(uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(        
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
-        bool scratch;
-        UNUSED_PARAM(scratch);
-        ASSERT(canBeJumpT4(instruction, target, scratch));
+        ASSERT(canBeJumpT4(instruction, target));
         
         intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
         // ARM encoding for the top two bits below the sign bit is 'peculiar'.
@@ -2015,47 +2760,55 @@ private:
         
         // All branch offsets should be an even distance.
         ASSERT(!(relative & 1));
-        instruction[-2] = OP_B_T4a | ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
-        instruction[-1] = OP_B_T4b | ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
+        uint16_t instructions[2];
+        instructions[0] = OP_B_T4a | ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
+        instructions[1] = OP_B_T4b | ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
+        performJITMemcpy(writeTarget - 2, instructions, 2 * sizeof(uint16_t));
     }
     
-    void linkConditionalJumpT4(Condition cond, uint16_t* instruction, void* target)
+    static void linkConditionalJumpT4(Condition cond, uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(        
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
         
-        instruction[-3] = ifThenElse(cond) | OP_IT;
-        linkJumpT4(instruction, target);
+        uint16_t newInstruction = ifThenElse(cond) | OP_IT;
+        performJITMemcpy(writeTarget - 3, &newInstruction, sizeof(uint16_t));
+        linkJumpT4(writeTarget, instruction, target);
     }
     
-    static void linkBX(uint16_t* instruction, void* target)
+    static void linkBX(uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(
-        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT_UNUSED(instruction, !(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(writeTarget) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
         
         const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
         ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
         ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
-        instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
-        instruction[-4] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
-        instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
-        instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
-        instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+        uint16_t instructions[5];
+        instructions[0] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+        instructions[1] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
+        instructions[2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+        instructions[3] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
+        instructions[4] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+
+        performJITMemcpy(writeTarget - 5, instructions, 5 * sizeof(uint16_t));
     }
     
-    void linkConditionalBX(Condition cond, uint16_t* instruction, void* target)
+    static void linkConditionalBX(Condition cond, uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(        
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
         
-        linkBX(instruction, target);
-        instruction[-6] = ifThenElse(cond, true, true) | OP_IT;
+        linkBX(writeTarget, instruction, target);
+        uint16_t newInstruction = ifThenElse(cond, true, true) | OP_IT;
+        performJITMemcpy(writeTarget - 6, &newInstruction, sizeof(uint16_t));
     }
     
-    static void linkJumpAbsolute(uint16_t* instruction, void* target)
+    static void linkJumpAbsolute(uint16_t* writeTarget, const uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
@@ -2063,27 +2816,31 @@ private:
         
         ASSERT((isMOV_imm_T3(instruction - 5) && isMOVT(instruction - 3) && isBX(instruction - 1))
                || (isNOP_T1(instruction - 5) && isNOP_T2(instruction - 4) && isB(instruction - 2)));
-        
-        bool scratch;
-        if (canBeJumpT4(instruction, target, scratch)) {
+
+        if (canBeJumpT4(instruction, target)) {
             // There may be a better way to fix this, but right now put the NOPs first, since in the
             // case of an conditional branch this will be coming after an ITTT predicating *three*
             // instructions!  Looking backwards to modify the ITTT to an IT is not easy, due to
             // variable wdith encoding - the previous instruction might *look* like an ITTT but
             // actually be the second half of a 2-word op.
-            instruction[-5] = OP_NOP_T1;
-            instruction[-4] = OP_NOP_T2a;
-            instruction[-3] = OP_NOP_T2b;
-            linkJumpT4(instruction, target);
+            uint16_t instructions[3];
+            instructions[0] = OP_NOP_T1;
+            instructions[1] = OP_NOP_T2a;
+            instructions[2] = OP_NOP_T2b;
+            performJITMemcpy(writeTarget - 5, instructions, 3 * sizeof(uint16_t));
+            linkJumpT4(writeTarget, instruction, target);
         } else {
             const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
             ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
             ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
-            instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
-            instruction[-4] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
-            instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
-            instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
-            instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+
+            uint16_t instructions[5];
+            instructions[0] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+            instructions[1] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
+            instructions[2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+            instructions[3] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
+            instructions[4] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+            performJITMemcpy(writeTarget - 5, instructions, 5 * sizeof(uint16_t));
         }
     }
     
@@ -2092,66 +2849,90 @@ private:
         return op | (imm.m_value.i << 10) | imm.m_value.imm4;
     }
 
+    static void decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(ARMThumbImmediate& result, uint16_t value)
+    {
+        result.m_value.i = (value >> 10) & 1;
+        result.m_value.imm4 = value & 15;
+    }
+
     static uint16_t twoWordOp5i6Imm4Reg4EncodedImmSecond(uint16_t rd, ARMThumbImmediate imm)
     {
         return (imm.m_value.imm3 << 12) | (rd << 8) | imm.m_value.imm8;
     }
 
+    static void decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(ARMThumbImmediate& result, uint16_t value)
+    {
+        result.m_value.imm3 = (value >> 12) & 7;
+        result.m_value.imm8 = value & 255;
+    }
+
     class ARMInstructionFormatter {
     public:
-        void oneWordOp5Reg3Imm8(OpcodeID op, RegisterID rd, uint8_t imm)
+        ALWAYS_INLINE void oneWordOp5Reg3Imm8(OpcodeID op, RegisterID rd, uint8_t imm)
         {
             m_buffer.putShort(op | (rd << 8) | imm);
         }
         
-        void oneWordOp5Imm5Reg3Reg3(OpcodeID op, uint8_t imm, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp5Imm5Reg3Reg3(OpcodeID op, uint8_t imm, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | (imm << 6) | (reg1 << 3) | reg2);
         }
 
-        void oneWordOp7Reg3Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2, RegisterID reg3)
+        ALWAYS_INLINE void oneWordOp7Reg3Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2, RegisterID reg3)
         {
             m_buffer.putShort(op | (reg1 << 6) | (reg2 << 3) | reg3);
         }
 
-        void oneWordOp8Imm8(OpcodeID op, uint8_t imm)
+        ALWAYS_INLINE void oneWordOp7Imm9(OpcodeID op, uint16_t imm)
+        {
+            m_buffer.putShort(op | imm);
+        }
+
+        ALWAYS_INLINE void oneWordOp8Imm8(OpcodeID op, uint8_t imm)
         {
             m_buffer.putShort(op | imm);
         }
 
-        void oneWordOp8RegReg143(OpcodeID op, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp8RegReg143(OpcodeID op, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | ((reg2 & 8) << 4) | (reg1 << 3) | (reg2 & 7));
         }
-        void oneWordOp9Imm7(OpcodeID op, uint8_t imm)
+
+        ALWAYS_INLINE void oneWordOp9Imm7(OpcodeID op, uint8_t imm)
         {
             m_buffer.putShort(op | imm);
         }
 
-        void oneWordOp10Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp10Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | (reg1 << 3) | reg2);
         }
 
-        void twoWordOp12Reg4FourFours(OpcodeID1 op, RegisterID reg, FourFours ff)
+        ALWAYS_INLINE void twoWordOp12Reg4FourFours(OpcodeID1 op, RegisterID reg, FourFours ff)
         {
             m_buffer.putShort(op | reg);
             m_buffer.putShort(ff.m_u.value);
         }
         
-        void twoWordOp16FourFours(OpcodeID1 op, FourFours ff)
+        ALWAYS_INLINE void twoWordOp16FourFours(OpcodeID1 op, FourFours ff)
         {
             m_buffer.putShort(op);
             m_buffer.putShort(ff.m_u.value);
         }
         
-        void twoWordOp16Op16(OpcodeID1 op1, OpcodeID2 op2)
+        ALWAYS_INLINE void twoWordOp16Op16(OpcodeID1 op1, OpcodeID2 op2)
         {
             m_buffer.putShort(op1);
             m_buffer.putShort(op2);
         }
 
-        void twoWordOp5i6Imm4Reg4EncodedImm(OpcodeID1 op, int imm4, RegisterID rd, ARMThumbImmediate imm)
+        ALWAYS_INLINE void twoWordOp16Imm16(OpcodeID1 op1, uint16_t imm)
+        {
+            m_buffer.putShort(op1);
+            m_buffer.putShort(imm);
+        }
+        
+        ALWAYS_INLINE void twoWordOp5i6Imm4Reg4EncodedImm(OpcodeID1 op, int imm4, RegisterID rd, ARMThumbImmediate imm)
         {
             ARMThumbImmediate newImm = imm;
             newImm.m_value.imm4 = imm4;
@@ -2160,17 +2941,23 @@ private:
             m_buffer.putShort(ARMv7Assembler::twoWordOp5i6Imm4Reg4EncodedImmSecond(rd, newImm));
         }
 
-        void twoWordOp12Reg4Reg4Imm12(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm)
+        ALWAYS_INLINE void twoWordOp12Reg4Reg4Imm12(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm)
         {
             m_buffer.putShort(op | reg1);
             m_buffer.putShort((reg2 << 12) | imm);
         }
 
+        ALWAYS_INLINE void twoWordOp12Reg40Imm3Reg4Imm20Imm5(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm1, uint16_t imm2, uint16_t imm3)
+        {
+            m_buffer.putShort(op | reg1);
+            m_buffer.putShort((imm1 << 12) | (reg2 << 8) | (imm2 << 6) | imm3);
+        }
+
         // Formats up instructions of the pattern:
         //    111111111B11aaaa:bbbb222SA2C2cccc
         // Where 1s in the pattern come from op1, 2s in the pattern come from op2, S is the provided size bit.
         // Operands provide 5 bit values of the form Aaaaa, Bbbbb, Ccccc.
-        void vfpOp(OpcodeID1 op1, OpcodeID2 op2, bool size, VFPOperand a, VFPOperand b, VFPOperand c)
+        ALWAYS_INLINE void vfpOp(OpcodeID1 op1, OpcodeID2 op2, bool size, VFPOperand a, VFPOperand b, VFPOperand c)
         {
             ASSERT(!(op1 & 0x004f));
             ASSERT(!(op2 & 0xf1af));
@@ -2180,7 +2967,7 @@ private:
 
         // Arm vfp addresses can be offset by a 9-bit ones-comp immediate, left shifted by 2.
         // (i.e. +/-(0..255) 32-bit words)
-        void vfpMemOp(OpcodeID1 op1, OpcodeID2 op2, bool size, RegisterID rn, VFPOperand rd, int32_t imm)
+        ALWAYS_INLINE void vfpMemOp(OpcodeID1 op1, OpcodeID2 op2, bool size, RegisterID rn, VFPOperand rd, int32_t imm)
         {
             bool up = true;
             if (imm < 0) {
@@ -2199,23 +2986,20 @@ private:
         // Administrative methods:
 
         size_t codeSize() const { return m_buffer.codeSize(); }
+        AssemblerLabel label() const { return m_buffer.label(); }
         bool isAligned(int alignment) const { return m_buffer.isAligned(alignment); }
         void* data() const { return m_buffer.data(); }
 
-#ifndef NDEBUG
-        unsigned debugOffset() { return m_formatter.debugOffset(); }
-#endif
+        unsigned debugOffset() { return m_buffer.debugOffset(); }
 
-    private:
         AssemblerBuffer m_buffer;
     } m_formatter;
 
-    Vector<LinkRecord> m_jumpsToLink;
-    Vector<int32_t> m_offsets;
+    Vector<LinkRecord, 0, UnsafeVectorOverflow> m_jumpsToLink;
+    int m_indexOfLastWatchpoint;
+    int m_indexOfTailOfLastWatchpoint;
 };
 
 } // namespace JSC
 
 #endif // ENABLE(ASSEMBLER) && CPU(ARM_THUMB2)
-
-#endif // ARMAssembler_h