wazevo(amd64): clears RBP at the entry from Go (#2081)
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -74,6 +74,10 @@ func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
|
||||
}
|
||||
}
|
||||
|
||||
// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
|
||||
zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
|
||||
cur = linkInstr(cur, zerosRbp)
|
||||
|
||||
// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
|
||||
// which is aligned to 16 bytes.
|
||||
call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
|
||||
|
||||
@@ -24,6 +24,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
mov.q %rbp, 16(%rax)
|
||||
mov.q %rsp, 24(%rax)
|
||||
movq %r13, %rsp
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
movq 16(%rdx), %rbp
|
||||
movq 24(%rdx), %rsp
|
||||
@@ -47,6 +48,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
movsd 24(%r12), %xmm1
|
||||
movdqu 32(%r12), %xmm2
|
||||
movq 48(%r12), %rsi
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
movq 16(%rdx), %rbp
|
||||
movq 24(%rdx), %rsp
|
||||
@@ -65,6 +67,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
mov.q %rbp, 16(%rax)
|
||||
mov.q %rsp, 24(%rax)
|
||||
movq %r13, %rsp
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
mov.l %rax, (%r12)
|
||||
movdqu %xmm0, 8(%r12)
|
||||
@@ -94,6 +97,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
movsd 24(%r12), %xmm1
|
||||
movdqu 32(%r12), %xmm2
|
||||
movq 48(%r12), %rsi
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
mov.l %rax, (%r12)
|
||||
movdqu %xmm0, 8(%r12)
|
||||
@@ -150,6 +154,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
movdqu %xmm15, 40(%rsp)
|
||||
movq 192(%r12), %r15
|
||||
mov.q %r15, 56(%rsp)
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
movq 16(%rdx), %rbp
|
||||
movq 24(%rdx), %rsp
|
||||
@@ -173,6 +178,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
mov.q %rsp, 24(%rax)
|
||||
movq %r13, %rsp
|
||||
sub $64, %rsp
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
mov.q %rax, (%r12)
|
||||
mov.q %rbx, 8(%r12)
|
||||
@@ -260,6 +266,7 @@ func TestMachineCompileEntryPreamble(t *testing.T) {
|
||||
movdqu %xmm15, 40(%rsp)
|
||||
movq 192(%r12), %r15
|
||||
mov.q %r15, 56(%rsp)
|
||||
xor %rbp, %rbp
|
||||
callq *%r14
|
||||
mov.q %rax, (%r12)
|
||||
mov.q %rbx, 8(%r12)
|
||||
|
||||
@@ -85,7 +85,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
return view
|
||||
}
|
||||
|
||||
func AdjustStackAfterGrown(oldRsp, rsp, rbp, top uintptr) {
|
||||
func AdjustStackAfterGrown(oldRsp, oldTop, rsp, rbp, top uintptr) {
|
||||
diff := uint64(rsp - oldRsp)
|
||||
|
||||
newBuf := stackView(rbp, top)
|
||||
@@ -120,7 +120,16 @@ func AdjustStackAfterGrown(oldRsp, rsp, rbp, top uintptr) {
|
||||
// End of stack.
|
||||
break
|
||||
}
|
||||
if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
|
||||
panic("BUG: callerRBP is out of range")
|
||||
}
|
||||
if int(callerRBP) < 0 {
|
||||
panic("BUG: callerRBP is negative")
|
||||
}
|
||||
adjustedCallerRBP := callerRBP + diff
|
||||
if int(adjustedCallerRBP) < 0 {
|
||||
panic("BUG: adjustedCallerRBP is negative")
|
||||
}
|
||||
binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
|
||||
i = adjustedCallerRBP - uint64(rbp)
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ func TestAdjustStackAfterGrown(t *testing.T) {
|
||||
|
||||
oldStack := allocSlice(512)
|
||||
oldRsp := uintptr(unsafe.Pointer(&oldStack[0]))
|
||||
oldTop := uintptr(unsafe.Pointer(&oldStack[len(oldStack)-1]))
|
||||
rbpIndex := uintptr(32)
|
||||
binary.LittleEndian.PutUint64(oldStack[rbpIndex:], addressOf(&oldStack[16+rbpIndex]))
|
||||
binary.LittleEndian.PutUint64(oldStack[rbpIndex+16:], addressOf(&oldStack[32+rbpIndex]))
|
||||
@@ -62,7 +63,7 @@ func TestAdjustStackAfterGrown(t *testing.T) {
|
||||
// Coy old stack to new stack which contains the old pointers to the old stack elements.
|
||||
copy(newStack, oldStack)
|
||||
|
||||
AdjustStackAfterGrown(oldRsp, rsp, rbp, uintptr(addressOf(&newStack[len(newStack)-1])))
|
||||
AdjustStackAfterGrown(oldRsp, oldTop, rsp, rbp, uintptr(addressOf(&newStack[len(newStack)-1])))
|
||||
require.Equal(t, addressOf(&newStack[rbpIndex+16]), binary.LittleEndian.Uint64(newStack[rbpIndex:]))
|
||||
require.Equal(t, addressOf(&newStack[rbpIndex+32]), binary.LittleEndian.Uint64(newStack[rbpIndex+16:]))
|
||||
require.Equal(t, addressOf(&newStack[rbpIndex+160]), binary.LittleEndian.Uint64(newStack[rbpIndex+32:]))
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
@@ -252,6 +253,8 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6
|
||||
return nil
|
||||
case wazevoapi.ExitCodeGrowStack:
|
||||
oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
|
||||
oldTop := c.stackTop
|
||||
oldStack := c.stack
|
||||
var newsp, newfp uintptr
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
newsp, newfp, err = c.growStackWithGuarded()
|
||||
@@ -261,7 +264,9 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
adjustStackAfterGrown(oldsp, newsp, newfp, c.stackTop)
|
||||
adjustStackAfterGrown(oldsp, oldTop, newsp, newfp, c.stackTop)
|
||||
// Old stack must be alive until the new stack is adjusted.
|
||||
runtime.KeepAlive(oldStack)
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp)
|
||||
case wazevoapi.ExitCodeGrowMemory:
|
||||
|
||||
@@ -47,14 +47,14 @@ func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
|
||||
// adjustStackAfterGrown is a function to adjust the stack after it is grown.
|
||||
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
|
||||
func adjustStackAfterGrown(oldsp, sp, fp, top uintptr) {
|
||||
func adjustStackAfterGrown(oldsp, oldTop, sp, fp, top uintptr) {
|
||||
switch runtime.GOARCH {
|
||||
case "arm64":
|
||||
// TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer,
|
||||
// so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise
|
||||
// at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers.
|
||||
case "amd64":
|
||||
amd64.AdjustStackAfterGrown(oldsp, sp, fp, top)
|
||||
amd64.AdjustStackAfterGrown(oldsp, oldTop, sp, fp, top)
|
||||
default:
|
||||
panic("unsupported architecture")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user