diff --git a/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go b/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go index 474c5bd8..882d06c0 100644 --- a/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go +++ b/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go @@ -74,6 +74,10 @@ func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction { } } + // Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack. + zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true) + cur = linkInstr(cur, zerosRbp) + // Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated, // which is aligned to 16 bytes. call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi) diff --git a/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble_test.go b/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble_test.go index 8a8f5c35..2dea21ee 100644 --- a/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble_test.go +++ b/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble_test.go @@ -24,6 +24,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { mov.q %rbp, 16(%rax) mov.q %rsp, 24(%rax) movq %r13, %rsp + xor %rbp, %rbp callq *%r14 movq 16(%rdx), %rbp movq 24(%rdx), %rsp @@ -47,6 +48,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { movsd 24(%r12), %xmm1 movdqu 32(%r12), %xmm2 movq 48(%r12), %rsi + xor %rbp, %rbp callq *%r14 movq 16(%rdx), %rbp movq 24(%rdx), %rsp @@ -65,6 +67,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { mov.q %rbp, 16(%rax) mov.q %rsp, 24(%rax) movq %r13, %rsp + xor %rbp, %rbp callq *%r14 mov.l %rax, (%r12) movdqu %xmm0, 8(%r12) @@ -94,6 +97,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { movsd 24(%r12), %xmm1 movdqu 32(%r12), %xmm2 movq 48(%r12), %rsi + xor %rbp, %rbp callq *%r14 mov.l %rax, (%r12) movdqu %xmm0, 8(%r12) @@ -150,6 +154,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { movdqu %xmm15, 40(%rsp) movq 192(%r12), %r15 mov.q %r15, 56(%rsp) + xor %rbp, %rbp callq *%r14 movq 16(%rdx), %rbp movq 24(%rdx), %rsp @@ -173,6 +178,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { mov.q %rsp, 24(%rax) movq %r13, %rsp sub $64, %rsp + xor %rbp, %rbp callq *%r14 mov.q %rax, (%r12) mov.q %rbx, 8(%r12) @@ -260,6 +266,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) { movdqu %xmm15, 40(%rsp) movq 192(%r12), %r15 mov.q %r15, 56(%rsp) + xor %rbp, %rbp callq *%r14 mov.q %rax, (%r12) mov.q %rbx, 8(%r12) diff --git a/internal/engine/wazevo/backend/isa/amd64/stack.go b/internal/engine/wazevo/backend/isa/amd64/stack.go index 9a70ac9d..d09c6d7d 100644 --- a/internal/engine/wazevo/backend/isa/amd64/stack.go +++ b/internal/engine/wazevo/backend/isa/amd64/stack.go @@ -85,7 +85,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { return view } -func AdjustStackAfterGrown(oldRsp, rsp, rbp, top uintptr) { +func AdjustStackAfterGrown(oldRsp, oldTop, rsp, rbp, top uintptr) { diff := uint64(rsp - oldRsp) newBuf := stackView(rbp, top) @@ -120,7 +120,16 @@ func AdjustStackAfterGrown(oldRsp, rsp, rbp, top uintptr) { // End of stack. break } + if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) { + panic("BUG: callerRBP is out of range") + } + if int(callerRBP) < 0 { + panic("BUG: callerRBP is negative") + } adjustedCallerRBP := callerRBP + diff + if int(adjustedCallerRBP) < 0 { + panic("BUG: adjustedCallerRBP is negative") + } binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP) i = adjustedCallerRBP - uint64(rbp) } diff --git a/internal/engine/wazevo/backend/isa/amd64/stack_test.go b/internal/engine/wazevo/backend/isa/amd64/stack_test.go index 25035029..460454e8 100644 --- a/internal/engine/wazevo/backend/isa/amd64/stack_test.go +++ b/internal/engine/wazevo/backend/isa/amd64/stack_test.go @@ -51,6 +51,7 @@ func TestAdjustStackAfterGrown(t *testing.T) { oldStack := allocSlice(512) oldRsp := uintptr(unsafe.Pointer(&oldStack[0])) + oldTop := uintptr(unsafe.Pointer(&oldStack[len(oldStack)-1])) rbpIndex := uintptr(32) binary.LittleEndian.PutUint64(oldStack[rbpIndex:], addressOf(&oldStack[16+rbpIndex])) binary.LittleEndian.PutUint64(oldStack[rbpIndex+16:], addressOf(&oldStack[32+rbpIndex])) @@ -62,7 +63,7 @@ func TestAdjustStackAfterGrown(t *testing.T) { // Coy old stack to new stack which contains the old pointers to the old stack elements. copy(newStack, oldStack) - AdjustStackAfterGrown(oldRsp, rsp, rbp, uintptr(addressOf(&newStack[len(newStack)-1]))) + AdjustStackAfterGrown(oldRsp, oldTop, rsp, rbp, uintptr(addressOf(&newStack[len(newStack)-1]))) require.Equal(t, addressOf(&newStack[rbpIndex+16]), binary.LittleEndian.Uint64(newStack[rbpIndex:])) require.Equal(t, addressOf(&newStack[rbpIndex+32]), binary.LittleEndian.Uint64(newStack[rbpIndex+16:])) require.Equal(t, addressOf(&newStack[rbpIndex+160]), binary.LittleEndian.Uint64(newStack[rbpIndex+32:])) diff --git a/internal/engine/wazevo/call_engine.go b/internal/engine/wazevo/call_engine.go index 8aabd4a4..22c24ea1 100644 --- a/internal/engine/wazevo/call_engine.go +++ b/internal/engine/wazevo/call_engine.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "fmt" "reflect" + "runtime" "sync/atomic" "unsafe" @@ -252,6 +253,8 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6 return nil case wazevoapi.ExitCodeGrowStack: oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)) + oldTop := c.stackTop + oldStack := c.stack var newsp, newfp uintptr if wazevoapi.StackGuardCheckEnabled { newsp, newfp, err = c.growStackWithGuarded() @@ -261,7 +264,9 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6 if err != nil { return err } - adjustStackAfterGrown(oldsp, newsp, newfp, c.stackTop) + adjustStackAfterGrown(oldsp, oldTop, newsp, newfp, c.stackTop) + // Old stack must be alive until the new stack is adjusted. + runtime.KeepAlive(oldStack) c.execCtx.exitCode = wazevoapi.ExitCodeOK afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp) case wazevoapi.ExitCodeGrowMemory: diff --git a/internal/engine/wazevo/isa.go b/internal/engine/wazevo/isa.go index 5f427f2c..f4bc7091 100644 --- a/internal/engine/wazevo/isa.go +++ b/internal/engine/wazevo/isa.go @@ -47,14 +47,14 @@ func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { // adjustStackAfterGrown is a function to adjust the stack after it is grown. // More precisely, absolute addresses (frame pointers) in the stack must be adjusted. -func adjustStackAfterGrown(oldsp, sp, fp, top uintptr) { +func adjustStackAfterGrown(oldsp, oldTop, sp, fp, top uintptr) { switch runtime.GOARCH { case "arm64": // TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer, // so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise // at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers. case "amd64": - amd64.AdjustStackAfterGrown(oldsp, sp, fp, top) + amd64.AdjustStackAfterGrown(oldsp, oldTop, sp, fp, top) default: panic("unsupported architecture") }