Only use 16 VFP registers if !CPU(ARM_NEON).
authormark.lam@apple.com <mark.lam@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 17 Aug 2017 19:57:46 +0000 (19:57 +0000)
committermark.lam@apple.com <mark.lam@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 17 Aug 2017 19:57:46 +0000 (19:57 +0000)
https://bugs.webkit.org/show_bug.cgi?id=175514

Reviewed by JF Bastien.

Source/JavaScriptCore:

Deleted q16-q31 FPQuadRegisterID enums in ARMv7Assembler.h.  The NEON spec
says that there are only 16 128-bit NEON registers.  This change is merely to
correct the code documentation of these registers.  The FPQuadRegisterID are
currently unused.

* assembler/ARMAssembler.h:
(JSC::ARMAssembler::lastFPRegister):
(JSC::ARMAssembler::fprName):
* assembler/ARMv7Assembler.h:
(JSC::ARMv7Assembler::lastFPRegister):
(JSC::ARMv7Assembler::fprName):
* assembler/MacroAssemblerARM.cpp:
* assembler/MacroAssemblerARMv7.cpp:

Source/WTF:

If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 support is
available. Hence, we'll only the first 16 FPDoubleRegisterIDs are available.

For reference, see:
NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html

This is mostly for GTK toolchains which may target older ARM CPUs which only have
VFP2 support.

* wtf/Platform.h:

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@220871 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/assembler/ARMAssembler.h
Source/JavaScriptCore/assembler/ARMv7Assembler.h
Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp
Source/JavaScriptCore/assembler/MacroAssemblerARMv7.cpp
Source/WTF/ChangeLog
Source/WTF/wtf/Platform.h

index 1219f3a..6ee82a6 100644 (file)
@@ -1,3 +1,24 @@
+2017-08-17  Mark Lam  <mark.lam@apple.com>
+
+        Only use 16 VFP registers if !CPU(ARM_NEON).
+        https://bugs.webkit.org/show_bug.cgi?id=175514
+
+        Reviewed by JF Bastien.
+
+        Deleted q16-q31 FPQuadRegisterID enums in ARMv7Assembler.h.  The NEON spec
+        says that there are only 16 128-bit NEON registers.  This change is merely to
+        correct the code documentation of these registers.  The FPQuadRegisterID are
+        currently unused.
+
+        * assembler/ARMAssembler.h:
+        (JSC::ARMAssembler::lastFPRegister):
+        (JSC::ARMAssembler::fprName):
+        * assembler/ARMv7Assembler.h:
+        (JSC::ARMv7Assembler::lastFPRegister):
+        (JSC::ARMv7Assembler::fprName):
+        * assembler/MacroAssemblerARM.cpp:
+        * assembler/MacroAssemblerARMv7.cpp:
+
 2017-08-17  Andreas Kling  <akling@apple.com>
 
         Disable CSS regions at compile time
index b389760..9e60716 100644 (file)
@@ -87,6 +87,7 @@ namespace JSC {
             d13,
             d14,
             d15,
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
             d16,
             d17,
             d18,
@@ -103,6 +104,7 @@ namespace JSC {
             d29,
             d30,
             d31,
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 
             // Pseudonyms for some of the registers.
             SD0 = d7, /* Same as thumb assembler. */
@@ -134,7 +136,11 @@ namespace JSC {
         static constexpr unsigned numberOfSPRegisters() { return lastSPRegister() - firstSPRegister() + 1; }
 
         static constexpr FPRegisterID firstFPRegister() { return ARMRegisters::d0; }
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
         static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d31; }
+#else
+        static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d15; }
+#endif
         static constexpr unsigned numberOfFPRegisters() { return lastFPRegister() - firstFPRegister() + 1; }
 
         static const char* gprName(RegisterID id)
@@ -166,10 +172,12 @@ namespace JSC {
                 "d4", "d5", "d6", "d7",
                 "d8", "d9", "d10", "d11",
                 "d12", "d13", "d14", "d15",
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
                 "d16", "d17", "d18", "d19",
                 "d20", "d21", "d22", "d23",
                 "d24", "d25", "d26", "d27",
                 "d28", "d29", "d30", "d31"
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
             };
             return nameForRegister[id];
         }
index 6fb1a57..080afc0 100644 (file)
@@ -123,6 +123,7 @@ namespace ARMRegisters {
         d13,
         d14,
         d15,
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
         d16,
         d17,
         d18,
@@ -139,8 +140,10 @@ namespace ARMRegisters {
         d29,
         d30,
         d31,
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     } FPDoubleRegisterID;
 
+#if CPU(ARM_NEON)
     typedef enum {
         q0,
         q1,
@@ -158,23 +161,8 @@ namespace ARMRegisters {
         q13,
         q14,
         q15,
-        q16,
-        q17,
-        q18,
-        q19,
-        q20,
-        q21,
-        q22,
-        q23,
-        q24,
-        q25,
-        q26,
-        q27,
-        q28,
-        q29,
-        q30,
-        q31,
     } FPQuadRegisterID;
+#endif // CPU(ARM_NEON)
 
     inline FPSingleRegisterID asSingle(FPDoubleRegisterID reg)
     {
@@ -433,7 +421,9 @@ public:
     typedef ARMRegisters::RegisterID RegisterID;
     typedef ARMRegisters::FPSingleRegisterID FPSingleRegisterID;
     typedef ARMRegisters::FPDoubleRegisterID FPDoubleRegisterID;
+#if CPU(ARM_NEON)
     typedef ARMRegisters::FPQuadRegisterID FPQuadRegisterID;
+#endif
     typedef ARMRegisters::SPRegisterID SPRegisterID;
     typedef FPDoubleRegisterID FPRegisterID;
     
@@ -446,7 +436,11 @@ public:
     static constexpr unsigned numberOfSPRegisters() { return lastSPRegister() - firstSPRegister() + 1; }
 
     static constexpr FPRegisterID firstFPRegister() { return ARMRegisters::d0; }
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d31; }
+#else
+    static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d15; }
+#endif
     static constexpr unsigned numberOfFPRegisters() { return lastFPRegister() - firstFPRegister() + 1; }
 
     static const char* gprName(RegisterID id)
@@ -478,10 +472,12 @@ public:
             "d4", "d5", "d6", "d7",
             "d8", "d9", "d10", "d11",
             "d12", "d13", "d14", "d15",
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
             "d16", "d17", "d18", "d19",
             "d20", "d21", "d22", "d23",
             "d24", "d25", "d26", "d27",
             "d28", "d29", "d30", "d31"
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
         };
         return nameForRegister[id];
     }
index 3596658..c5cc51b 100644 (file)
@@ -153,6 +153,8 @@ extern "C" void ctiMasmProbeTrampoline();
 #define PROBE_CPU_D13_OFFSET (PROBE_FIRST_FPREG_OFFSET + (13 * FPREG_SIZE))
 #define PROBE_CPU_D14_OFFSET (PROBE_FIRST_FPREG_OFFSET + (14 * FPREG_SIZE))
 #define PROBE_CPU_D15_OFFSET (PROBE_FIRST_FPREG_OFFSET + (15 * FPREG_SIZE))
+
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 #define PROBE_CPU_D16_OFFSET (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
 #define PROBE_CPU_D17_OFFSET (PROBE_FIRST_FPREG_OFFSET + (17 * FPREG_SIZE))
 #define PROBE_CPU_D18_OFFSET (PROBE_FIRST_FPREG_OFFSET + (18 * FPREG_SIZE))
@@ -171,6 +173,9 @@ extern "C" void ctiMasmProbeTrampoline();
 #define PROBE_CPU_D31_OFFSET (PROBE_FIRST_FPREG_OFFSET + (31 * FPREG_SIZE))
 
 #define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (32 * FPREG_SIZE))
+#else
+#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 
 #define OUT_SIZE GPREG_SIZE
 
@@ -222,6 +227,8 @@ COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d12]) == PROBE_CPU_D12_OFFS
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d13]) == PROBE_CPU_D13_OFFSET, ProbeContext_cpu_d13_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d14]) == PROBE_CPU_D14_OFFSET, ProbeContext_cpu_d14_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d15]) == PROBE_CPU_D15_OFFSET, ProbeContext_cpu_d15_offset_matches_ctiMasmProbeTrampoline);
+
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d16]) == PROBE_CPU_D16_OFFSET, ProbeContext_cpu_d16_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d17]) == PROBE_CPU_D17_OFFSET, ProbeContext_cpu_d17_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d18]) == PROBE_CPU_D18_OFFSET, ProbeContext_cpu_d18_offset_matches_ctiMasmProbeTrampoline);
@@ -238,6 +245,7 @@ COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d28]) == PROBE_CPU_D28_OFFS
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d29]) == PROBE_CPU_D29_OFFSET, ProbeContext_cpu_d29_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d30]) == PROBE_CPU_D30_OFFSET, ProbeContext_cpu_d30_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d31]) == PROBE_CPU_D31_OFFSET, ProbeContext_cpu_d31_offset_matches_ctiMasmProbeTrampoline);
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 
 COMPILE_ASSERT(sizeof(ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
 #undef PROBE_OFFSETOF
@@ -291,8 +299,9 @@ asm (
 
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D0_OFFSET) "\n"
     "vstmia.64 ip!, { d0-d15 }" "\n"
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     "vstmia.64 ip!, { d16-d31 }" "\n"
-
+#endif
     "mov       fp, sp" "\n" // Save the ProbeContext*.
 
     // Initialize ProbeContext::initializeStackFunction to zero.
@@ -349,9 +358,15 @@ asm (
     // To enable probes to modify register state, we copy all registers
     // out of the ProbeContext before returning.
 
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D31_OFFSET + FPREG_SIZE) "\n"
     "vldmdb.64 ip!, { d16-d31 }" "\n"
     "vldmdb.64 ip!, { d0-d15 }" "\n"
+#else
+    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D15_OFFSET + FPREG_SIZE) "\n"
+    "vldmdb.64 ip!, { d0-d15 }" "\n"
+#endif
+
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET + GPREG_SIZE) "\n"
     "ldmdb     ip, { r0-r11 }" "\n"
     "ldr       ip, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FPSCR_OFFSET) "]" "\n"
index 61fc490..dc83566 100644 (file)
@@ -89,6 +89,8 @@ extern "C" void ctiMasmProbeTrampoline();
 #define PROBE_CPU_D13_OFFSET (PROBE_FIRST_FPREG_OFFSET + (13 * FPREG_SIZE))
 #define PROBE_CPU_D14_OFFSET (PROBE_FIRST_FPREG_OFFSET + (14 * FPREG_SIZE))
 #define PROBE_CPU_D15_OFFSET (PROBE_FIRST_FPREG_OFFSET + (15 * FPREG_SIZE))
+
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 #define PROBE_CPU_D16_OFFSET (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
 #define PROBE_CPU_D17_OFFSET (PROBE_FIRST_FPREG_OFFSET + (17 * FPREG_SIZE))
 #define PROBE_CPU_D18_OFFSET (PROBE_FIRST_FPREG_OFFSET + (18 * FPREG_SIZE))
@@ -107,6 +109,9 @@ extern "C" void ctiMasmProbeTrampoline();
 #define PROBE_CPU_D31_OFFSET (PROBE_FIRST_FPREG_OFFSET + (31 * FPREG_SIZE))
 
 #define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (32 * FPREG_SIZE))
+#else
+#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 
 #define OUT_SIZE GPREG_SIZE
 
@@ -159,6 +164,7 @@ COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d13]) == PROBE_CPU_D13_OFFS
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d14]) == PROBE_CPU_D14_OFFSET, ProbeContext_cpu_d14_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d15]) == PROBE_CPU_D15_OFFSET, ProbeContext_cpu_d15_offset_matches_ctiMasmProbeTrampoline);
 
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d16]) == PROBE_CPU_D16_OFFSET, ProbeContext_cpu_d16_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d17]) == PROBE_CPU_D17_OFFSET, ProbeContext_cpu_d17_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d18]) == PROBE_CPU_D18_OFFSET, ProbeContext_cpu_d18_offset_matches_ctiMasmProbeTrampoline);
@@ -175,10 +181,11 @@ COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d28]) == PROBE_CPU_D28_OFFS
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d29]) == PROBE_CPU_D29_OFFSET, ProbeContext_cpu_d29_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d30]) == PROBE_CPU_D30_OFFSET, ProbeContext_cpu_d30_offset_matches_ctiMasmProbeTrampoline);
 COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d31]) == PROBE_CPU_D31_OFFSET, ProbeContext_cpu_d31_offset_matches_ctiMasmProbeTrampoline);
+#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
 
 COMPILE_ASSERT(sizeof(ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
 #undef PROBE_OFFSETOF
-    
+
 asm (
     ".text" "\n"
     ".align 2" "\n"
@@ -230,8 +237,9 @@ asm (
 
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D0_OFFSET) "\n"
     "vstmia.64 ip!, { d0-d15 }" "\n"
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     "vstmia.64 ip!, { d16-d31 }" "\n"
-
+#endif
     "mov       fp, sp" "\n" // Save the ProbeContext*.
 
     // Initialize ProbeContext::initializeStackFunction to zero.
@@ -289,9 +297,14 @@ asm (
     // To enable probes to modify register state, we copy all registers
     // out of the ProbeContext before returning.
 
+#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D31_OFFSET + FPREG_SIZE) "\n"
     "vldmdb.64 ip!, { d16-d31 }" "\n"
     "vldmdb.64 ip!, { d0-d15 }" "\n"
+#else
+    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D15_OFFSET + FPREG_SIZE) "\n"
+    "vldmdb.64 ip!, { d0-d15 }" "\n"
+#endif
 
     "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET + GPREG_SIZE) "\n"
     "ldmdb     ip, { r0-r11 }" "\n"
index e53be4b..041b7fe 100644 (file)
@@ -1,3 +1,23 @@
+2017-08-17  Mark Lam  <mark.lam@apple.com>
+
+        Only use 16 VFP registers if !CPU(ARM_NEON).
+        https://bugs.webkit.org/show_bug.cgi?id=175514
+
+        Reviewed by JF Bastien.
+
+        If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 support is
+        available. Hence, we'll only the first 16 FPDoubleRegisterIDs are available.
+
+        For reference, see:
+        NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
+        VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
+        NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html
+
+        This is mostly for GTK toolchains which may target older ARM CPUs which only have
+        VFP2 support.
+
+        * wtf/Platform.h:
+
 2017-08-16  Mark Lam  <mark.lam@apple.com>
 
         Add back the ability to disable MASM_PROBE from the build.
index c5dfeed..9262ff1 100644 (file)
 #define WTF_CPU_ARM_VFP 1
 #endif
 
+/* If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 or VFPv3D16
+   support is available. Hence, only the first 16 64-bit floating point registers
+   are available. See:
+   NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
+   VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
+   NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html
+*/
+#if CPU(ARM_NEON)
+#define WTF_CPU_ARM_VFP_V3_D32 1
+#else
+#define WTF_CPU_ARM_VFP_V2 1
+#endif
+
 #if defined(__ARM_ARCH_7K__)
 #define WTF_CPU_APPLE_ARMV7K 1
 #endif