EXTERN getHostCallReturnValueWithExecState : near
PUBLIC getHostCallReturnValue
+PUBLIC ctiMasmProbeTrampoline
_TEXT SEGMENT
ret
getHostCallReturnValue ENDP
+; The following constants must match the x86_64 version in MacroAssemblerX86Common.cpp.
+PTR_SIZE EQU 8
+
+PROBE_PROBE_FUNCTION_OFFSET EQU (0 * PTR_SIZE)
+PROBE_ARG_OFFSET EQU (1 * PTR_SIZE)
+PROBE_INIT_STACK_FUNCTION_OFFSET EQU (2 * PTR_SIZE)
+PROBE_INIT_STACK_ARG_OFFSET EQU (3 * PTR_SIZE)
+
+PROBE_FIRST_GPR_OFFSET EQU (4 * PTR_SIZE)
+PROBE_CPU_EAX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE))
+PROBE_CPU_ECX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE))
+PROBE_CPU_EDX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE))
+PROBE_CPU_EBX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE))
+PROBE_CPU_ESP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE))
+PROBE_CPU_EBP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE))
+PROBE_CPU_ESI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE))
+PROBE_CPU_EDI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE))
+
+PROBE_CPU_R8_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE))
+PROBE_CPU_R9_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE))
+PROBE_CPU_R10_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE))
+PROBE_CPU_R11_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE))
+PROBE_CPU_R12_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE))
+PROBE_CPU_R13_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE))
+PROBE_CPU_R14_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE))
+PROBE_CPU_R15_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE))
+PROBE_FIRST_SPR_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE))
+
+PROBE_CPU_EIP_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (0 * PTR_SIZE))
+PROBE_CPU_EFLAGS_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (1 * PTR_SIZE))
+PROBE_FIRST_XMM_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (2 * PTR_SIZE))
+
+XMM_SIZE EQU 8
+PROBE_CPU_XMM0_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE))
+PROBE_CPU_XMM1_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE))
+PROBE_CPU_XMM2_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE))
+PROBE_CPU_XMM3_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (3 * XMM_SIZE))
+PROBE_CPU_XMM4_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (4 * XMM_SIZE))
+PROBE_CPU_XMM5_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (5 * XMM_SIZE))
+PROBE_CPU_XMM6_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE))
+PROBE_CPU_XMM7_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE))
+
+PROBE_CPU_XMM8_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE))
+PROBE_CPU_XMM9_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE))
+PROBE_CPU_XMM10_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE))
+PROBE_CPU_XMM11_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE))
+PROBE_CPU_XMM12_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE))
+PROBE_CPU_XMM13_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE))
+PROBE_CPU_XMM14_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE))
+PROBE_CPU_XMM15_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE))
+PROBE_SIZE EQU (PROBE_CPU_XMM15_OFFSET + XMM_SIZE)
+
+PROBE_EXECUTOR_OFFSET EQU PROBE_SIZE ; Stash the executeProbe function pointer at the end of the ProbeContext.
+
+OUT_SIZE EQU (5 * PTR_SIZE)
+
+ctiMasmProbeTrampoline PROC
+ pushfq
+
+ ; MacroAssemblerX86Common::probe() has already generated code to store some values.
+ ; Together with the rflags pushed above, the top of stack now looks like this:
+ ; rsp[0 * ptrSize]: rflags
+ ; rsp[1 * ptrSize]: return address / saved rip
+ ; rsp[2 * ptrSize]: saved rbx
+ ; rsp[3 * ptrSize]: saved rdx
+ ; rsp[4 * ptrSize]: saved rcx
+ ; rsp[5 * ptrSize]: saved rax
+ ;
+ ; Incoming registers contain:
+ ; rcx: Probe::executeProbe
+ ; rdx: probe function
+ ; rbx: probe arg
+ ; rax: scratch (was ctiMasmProbeTrampoline)
+
+ mov rax, rsp
+ sub rsp, PROBE_SIZE + OUT_SIZE
+
+ ; The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes.
+ and rsp, not 01fh
+ ; Since sp points to the ProbeContext, we've ensured that it's protected from interrupts before we initialize it.
+
+ mov [PROBE_CPU_EBP_OFFSET + rsp], rbp
+ mov rbp, rsp ; Save the ProbeContext*.
+
+ mov [PROBE_EXECUTOR_OFFSET + rbp], rcx
+ mov [PROBE_PROBE_FUNCTION_OFFSET + rbp], rdx
+ mov [PROBE_ARG_OFFSET + rbp], rbx
+ mov [PROBE_CPU_ESI_OFFSET + rbp], rsi
+ mov [PROBE_CPU_EDI_OFFSET + rbp], rdi
+
+ mov rcx, [0 * PTR_SIZE + rax]
+ mov [PROBE_CPU_EFLAGS_OFFSET + rbp], rcx
+ mov rcx, [1 * PTR_SIZE + rax]
+ mov [PROBE_CPU_EIP_OFFSET + rbp], rcx
+ mov rcx, [2 * PTR_SIZE + rax]
+ mov [PROBE_CPU_EBX_OFFSET + rbp], rcx
+ mov rcx, [3 * PTR_SIZE + rax]
+ mov [PROBE_CPU_EDX_OFFSET + rbp], rcx
+ mov rcx, [4 * PTR_SIZE + rax]
+ mov [PROBE_CPU_ECX_OFFSET + rbp], rcx
+ mov rcx, [5 * PTR_SIZE + rax]
+ mov [PROBE_CPU_EAX_OFFSET + rbp], rcx
+
+ mov rcx, rax
+ add rcx, 6 * PTR_SIZE
+ mov [PROBE_CPU_ESP_OFFSET + rbp], rcx
+
+ mov [PROBE_CPU_R8_OFFSET + rbp], r8
+ mov [PROBE_CPU_R9_OFFSET + rbp], r9
+ mov [PROBE_CPU_R10_OFFSET + rbp], r10
+ mov [PROBE_CPU_R11_OFFSET + rbp], r11
+ mov [PROBE_CPU_R12_OFFSET + rbp], r12
+ mov [PROBE_CPU_R13_OFFSET + rbp], r13
+ mov [PROBE_CPU_R14_OFFSET + rbp], r14
+ mov [PROBE_CPU_R15_OFFSET + rbp], r15
+
+ movq qword ptr [PROBE_CPU_XMM0_OFFSET + rbp], xmm0
+ movq qword ptr [PROBE_CPU_XMM1_OFFSET + rbp], xmm1
+ movq qword ptr [PROBE_CPU_XMM2_OFFSET + rbp], xmm2
+ movq qword ptr [PROBE_CPU_XMM3_OFFSET + rbp], xmm3
+ movq qword ptr [PROBE_CPU_XMM4_OFFSET + rbp], xmm4
+ movq qword ptr [PROBE_CPU_XMM5_OFFSET + rbp], xmm5
+ movq qword ptr [PROBE_CPU_XMM6_OFFSET + rbp], xmm6
+ movq qword ptr [PROBE_CPU_XMM7_OFFSET + rbp], xmm7
+ movq qword ptr [PROBE_CPU_XMM8_OFFSET + rbp], xmm8
+ movq qword ptr [PROBE_CPU_XMM9_OFFSET + rbp], xmm9
+ movq qword ptr [PROBE_CPU_XMM10_OFFSET + rbp], xmm10
+ movq qword ptr [PROBE_CPU_XMM11_OFFSET + rbp], xmm11
+ movq qword ptr [PROBE_CPU_XMM12_OFFSET + rbp], xmm12
+ movq qword ptr [PROBE_CPU_XMM13_OFFSET + rbp], xmm13
+ movq qword ptr [PROBE_CPU_XMM14_OFFSET + rbp], xmm14
+ movq qword ptr [PROBE_CPU_XMM15_OFFSET + rbp], xmm15
+
+ mov rcx, rbp ; the Probe::State* arg.
+ sub rsp, 32 ; shadow space
+ call qword ptr[PROBE_EXECUTOR_OFFSET + rbp]
+ add rsp, 32
+
+ ; Make sure the ProbeContext is entirely below the result stack pointer so
+ ; that register values are still preserved when we call the initializeStack
+ ; function.
+ mov rcx, PROBE_SIZE + OUT_SIZE
+ mov rax, rbp
+ mov rdx, [PROBE_CPU_ESP_OFFSET + rbp]
+ add rax, rcx
+ cmp rdx, rax
+ jge ctiMasmProbeTrampolineProbeContextIsSafe
+
+ ; Allocate a safe place on the stack below the result stack pointer to stash the ProbeContext.
+ sub rdx, rcx
+ and rdx, not 01fh ; Keep the stack pointer 32 bytes aligned.
+ xor rax, rax
+ mov rsp, rdx
+
+ mov rcx, PROBE_SIZE
+
+ ; Copy the ProbeContext to the safe place.
+ ctiMasmProbeTrampolineCopyLoop:
+ mov rdx, [rbp + rax]
+ mov [rsp + rax], rdx
+ add rax, PTR_SIZE
+ cmp rcx, rax
+ jg ctiMasmProbeTrampolineCopyLoop
+
+ mov rbp, rsp
+
+ ; Call initializeStackFunction if present.
+ ctiMasmProbeTrampolineProbeContextIsSafe:
+ xor rcx, rcx
+ add rcx, [PROBE_INIT_STACK_FUNCTION_OFFSET + rbp]
+ je ctiMasmProbeTrampolineRestoreRegisters
+
+ mov rdx, rcx
+ mov rcx, rbp ; the Probe::State* arg.
+ sub rsp, 32 ; shadow space
+ call rdx
+ add rsp, 32
+
+ ctiMasmProbeTrampolineRestoreRegisters:
+
+ ; To enable probes to modify register state, we copy all registers
+ ; out of the ProbeContext before returning.
+
+ mov rdx, [PROBE_CPU_EDX_OFFSET + rbp]
+ mov rbx, [PROBE_CPU_EBX_OFFSET + rbp]
+ mov rsi, [PROBE_CPU_ESI_OFFSET + rbp]
+ mov rdi, [PROBE_CPU_EDI_OFFSET + rbp]
+
+ mov r8, [PROBE_CPU_R8_OFFSET + rbp]
+ mov r9, [PROBE_CPU_R9_OFFSET + rbp]
+ mov r10, [PROBE_CPU_R10_OFFSET + rbp]
+ mov r11, [PROBE_CPU_R11_OFFSET + rbp]
+ mov r12, [PROBE_CPU_R12_OFFSET + rbp]
+ mov r13, [PROBE_CPU_R13_OFFSET + rbp]
+ mov r14, [PROBE_CPU_R14_OFFSET + rbp]
+ mov r15, [PROBE_CPU_R15_OFFSET + rbp]
+
+ movq xmm0, qword ptr[PROBE_CPU_XMM0_OFFSET + rbp]
+ movq xmm1, qword ptr[PROBE_CPU_XMM1_OFFSET + rbp]
+ movq xmm2, qword ptr[PROBE_CPU_XMM2_OFFSET + rbp]
+ movq xmm3, qword ptr[PROBE_CPU_XMM3_OFFSET + rbp]
+ movq xmm4, qword ptr[PROBE_CPU_XMM4_OFFSET + rbp]
+ movq xmm5, qword ptr[PROBE_CPU_XMM5_OFFSET + rbp]
+ movq xmm6, qword ptr[PROBE_CPU_XMM6_OFFSET + rbp]
+ movq xmm7, qword ptr[PROBE_CPU_XMM7_OFFSET + rbp]
+ movq xmm8, qword ptr[PROBE_CPU_XMM8_OFFSET + rbp]
+ movq xmm9, qword ptr[PROBE_CPU_XMM9_OFFSET + rbp]
+ movq xmm10, qword ptr[PROBE_CPU_XMM10_OFFSET + rbp]
+ movq xmm11, qword ptr[PROBE_CPU_XMM11_OFFSET + rbp]
+ movq xmm12, qword ptr[PROBE_CPU_XMM12_OFFSET + rbp]
+ movq xmm13, qword ptr[PROBE_CPU_XMM13_OFFSET + rbp]
+ movq xmm14, qword ptr[PROBE_CPU_XMM14_OFFSET + rbp]
+ movq xmm15, qword ptr[PROBE_CPU_XMM15_OFFSET + rbp]
+
+ ; There are 6 more registers left to restore:
+ ; rax, rcx, rbp, rsp, rip, and rflags.
+
+ ; The restoration process at ctiMasmProbeTrampolineEnd below works by popping
+ ; 5 words off the stack into rflags, rax, rcx, rbp, and rip. These 5 words need
+ ; to be pushed on top of the final esp value so that just by popping the 5 words,
+ ; we'll get the esp that the probe wants to set. Let's call this area (for storing
+ ; these 5 words) the restore area.
+ mov rcx, [PROBE_CPU_ESP_OFFSET + rbp]
+ sub rcx, 5 * PTR_SIZE
+
+ ; rcx now points to the restore area.
+
+ ; Copy remaining restore values from the ProbeContext to the restore area.
+ ; Note: We already ensured above that the ProbeContext is in a safe location before
+ ; calling the initializeStackFunction. The initializeStackFunction is not allowed to
+ ; change the stack pointer again.
+ mov rax, [PROBE_CPU_EFLAGS_OFFSET + rbp]
+ mov [0 * PTR_SIZE + rcx], rax
+ mov rax, [PROBE_CPU_EAX_OFFSET + rbp]
+ mov [1 * PTR_SIZE + rcx], rax
+ mov rax, [PROBE_CPU_ECX_OFFSET + rbp]
+ mov [2 * PTR_SIZE + rcx], rax
+ mov rax, [PROBE_CPU_EBP_OFFSET + rbp]
+ mov [3 * PTR_SIZE + rcx], rax
+ mov rax, [PROBE_CPU_EIP_OFFSET + rbp]
+ mov [4 * PTR_SIZE + rcx], rax
+ mov rsp, rcx
+
+ ; Do the remaining restoration by popping off the restore area.
+ popfq
+ pop rax
+ pop rcx
+ pop rbp
+ ret
+ctiMasmProbeTrampoline ENDP
+
_TEXT ENDS
END