From 9bc1ae6816da76dde0acdf9757d0d3183f5d80a6 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Fri, 10 Nov 2023 11:27:13 +0900 Subject: [PATCH] wazevo(arm64): places spill slots below clobbered regs (#1833) Signed-off-by: Takeshi Yoneda --- .../wazevo/backend/isa/arm64/machine.go | 2 +- .../isa/arm64/machine_pro_epi_logue.go | 160 +++++++++--------- .../isa/arm64/machine_pro_epi_logue_test.go | 8 +- .../isa/arm64/machine_regalloc_test.go | 10 +- .../wazevo/backend/isa/arm64/machine_test.go | 6 +- 5 files changed, 89 insertions(+), 97 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index 046bf6be..abf3b8f0 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -485,7 +485,7 @@ func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) in m.spillSlots[id] = offset m.spillSlotSize += int64(size) } - return offset + m.clobberedRegSlotSize() + 16 // spill slot starts above the clobbered registers and the frame size. + return offset + 16 // spill slot starts above the clobbered registers and the frame size. } func (m *machine) clobberedRegSlotSize() int64 { diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index e24c48de..fd0e58a7 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -41,6 +41,39 @@ func (m *machine) SetupPrologue() { panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots)) } + if regs := m.clobberedRegs; len(regs) > 0 { + // + // (high address) (high address) + // +-----------------+ +-----------------+ + // | ....... | | ....... | + // | ret Y | | ret Y | + // | ....... | | ....... | + // | ret 0 | | ret 0 | + // | arg X | | arg X | + // | ....... | | ....... | + // | arg 1 | | arg 1 | + // | arg 0 | | arg 0 | + // | size_of_arg_ret | | size_of_arg_ret | + // | ReturnAddress | | ReturnAddress | + // SP----> +-----------------+ ====> +-----------------+ + // (low address) | clobbered M | + // | ............ | + // | clobbered 0 | + // +-----------------+ <----- SP + // (low address) + // + _amode := addressModePreOrPostIndex(spVReg, + -16, // stack pointer must be 16-byte aligned. + true, // Decrement before store. + ) + for _, vr := range regs { + // TODO: pair stores to reduce the number of instructions. + store := m.allocateInstr() + store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType())) + cur = linkInstr(cur, store) + } + } + if size := m.spillSlotSize; size > 0 { // Check if size is 16-byte aligned. if size&0xf != 0 { @@ -64,53 +97,17 @@ func (m *machine) SetupPrologue() { // | size_of_arg_ret | // | ReturnAddress | // +------------------+ - // | spill slot M | + // | clobbered M | + // | ............ | + // | clobbered 0 | + // | spill slot N | // | ............ | // | spill slot 2 | - // | spill slot 1 | + // | spill slot 0 | // SP----> +------------------+ // (low address) } - if regs := m.clobberedRegs; len(regs) > 0 { - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | size_of_arg_ret | | size_of_arg_ret | - // | ReturnAddress | | ReturnAddress | - // +-----------------+ ====> +-----------------+ - // | ........... | | ........... | - // | spill slot M | | spill slot M | - // | ............ | | ............ | - // | spill slot 2 | | spill slot 2 | - // | spill slot 1 | | spill slot 1 | - // SP----> +-----------------+ | spill slot 1 | - // (low address) | clobbered N | - // | ............ | - // | clobbered 0 | - // +-----------------+ <----- SP - // (low address) - // - _amode := addressModePreOrPostIndex(spVReg, - -16, // stack pointer must be 16-byte aligned. - true, // Decrement before store. - ) - for _, vr := range regs { - // TODO: pair stores to reduce the number of instructions. - store := m.allocateInstr() - store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType())) - cur = linkInstr(cur, store) - } - } - // We push the frame size into the stack to make it possible to unwind stack: // // @@ -127,15 +124,14 @@ func (m *machine) SetupPrologue() { // | size_of_arg_ret | | size_of_arg_ret | // | ReturnAddress | | ReturnAddress | // +-----------------+ ==> +-----------------+ <----+ - // | ........... | | ........... | | - // | spill slot M | | spill slot M | | + // | clobbered M | | clobbered M | | // | ............ | | ............ | | - // | spill slot 2 | | spill slot 2 | | - // | spill slot 1 | | spill slot 1 | | frame size - // | spill slot 1 | | spill slot 1 | | - // | clobbered N | | clobbered N | | + // | clobbered 2 | | clobbered 2 | | + // | clobbered 1 | | clobbered 1 | | frame size + // | clobbered 0 | | clobbered 0 | | + // | spill slot N | | spill slot N | | // | ............ | | ............ | | - // | clobbered 0 | | clobbered 0 | <----+ + // | spill slot 0 | | spill slot 0 | <----+ // SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned. // | frame_size | // +-----------------+ <---- SP @@ -215,6 +211,35 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore. cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true) + if s := m.spillSlotSize; s > 0 { + // Adjust SP to the original value: + // + // (high address) (high address) + // +-----------------+ +-----------------+ + // | ....... | | ....... | + // | ret Y | | ret Y | + // | ....... | | ....... | + // | ret 0 | | ret 0 | + // | arg X | | arg X | + // | ....... | | ....... | + // | arg 1 | | arg 1 | + // | arg 0 | | arg 0 | + // | xxxxx | | xxxxx | + // | ReturnAddress | | ReturnAddress | + // +-----------------+ ====> +-----------------+ + // | clobbered M | | clobbered M | + // | ............ | | ............ | + // | clobbered 1 | | clobbered 1 | + // | clobbered 0 | | clobbered 0 | + // | spill slot N | +-----------------+ <---- SP + // | ............ | + // | spill slot 0 | + // SP---> +-----------------+ + // (low address) + // + cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) + } + // First we need to restore the clobbered registers. if len(m.clobberedRegs) > 0 { // (high address) @@ -227,17 +252,13 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // | ....... | | ....... | // | arg 1 | | arg 1 | // | arg 0 | | arg 0 | + // | xxxxx | | xxxxx | // | ReturnAddress | | ReturnAddress | - // +-----------------+ ========> +-----------------+ - // | ........... | | ........... | - // | spill slot M | | spill slot M | - // | ............ | | ............ | - // | spill slot 2 | | spill slot 2 | - // | spill slot 1 | | spill slot 1 | - // | clobbered 0 | SP---> +-----------------+ + // +-----------------+ ========> +-----------------+ <---- SP + // | clobbered M | // | clobbered 1 | // | ........... | - // | clobbered N | + // | clobbered 0 | // SP---> +-----------------+ // (low address) @@ -260,33 +281,6 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { } } - if s := m.spillSlotSize; s > 0 { - // Adjust SP to the original value: - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | xxxxx | | xxxxx | - // | ReturnAddress | | ReturnAddress | - // +-----------------+ ====> +-----------------+ <---- SP - // | ........... | (low address) - // | spill slot M | - // | ............ | - // | spill slot 2 | - // | spill slot 1 | - // SP---> +-----------------+ - // (low address) - // - cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) - } - // Reload the return address (lr). // // +-----------------+ +-----------------+ diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go index 75b3944a..bc174a47 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go @@ -62,11 +62,11 @@ func TestMachine_SetupPrologue(t *testing.T) { clobberedRegs: []regalloc.VReg{v18VReg, v19VReg, x18VReg, x25VReg}, exp: ` stp x30, xzr, [sp, #-0x10]! - sub sp, sp, #0x140 str q18, [sp, #-0x10]! str q19, [sp, #-0x10]! str x18, [sp, #-0x10]! str x25, [sp, #-0x10]! + sub sp, sp, #0x140 orr x27, xzr, #0x180 str x27, [sp, #-0x10]! udf @@ -80,11 +80,11 @@ func TestMachine_SetupPrologue(t *testing.T) { orr x27, xzr, #0x1e0 sub sp, sp, x27 stp x30, x27, [sp, #-0x10]! - sub sp, sp, #0x140 str q18, [sp, #-0x10]! str q19, [sp, #-0x10]! str x18, [sp, #-0x10]! str x25, [sp, #-0x10]! + sub sp, sp, #0x140 orr x27, xzr, #0x180 str x27, [sp, #-0x10]! udf @@ -177,11 +177,11 @@ func TestMachine_SetupEpilogue(t *testing.T) { { exp: ` add sp, sp, #0x10 + add sp, sp, #0xa0 ldr x25, [sp], #0x10 ldr x18, [sp], #0x10 ldr q27, [sp], #0x10 ldr q18, [sp], #0x10 - add sp, sp, #0xa0 ldr x30, [sp], #0x10 ret `, @@ -191,11 +191,11 @@ func TestMachine_SetupEpilogue(t *testing.T) { { exp: ` add sp, sp, #0x10 + add sp, sp, #0xa0 ldr x25, [sp], #0x10 ldr x18, [sp], #0x10 ldr q27, [sp], #0x10 ldr q18, [sp], #0x10 - add sp, sp, #0xa0 ldr x30, [sp], #0x10 add sp, sp, #0x150 ret diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_regalloc_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_regalloc_test.go index 486cbcd3..389dc73e 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_regalloc_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_regalloc_test.go @@ -62,7 +62,6 @@ func TestRegAllocFunctionImpl_ReversePostOrderBlockIterator(t *testing.T) { func TestRegAllocFunctionImpl_ReloadRegisterAfter(t *testing.T) { ctx, _, m := newSetupWithMockContext() - m.clobberedRegs = make([]regalloc.VReg, 3) // This will make the beginning of the spill slot at (3 + 1(frame size))* 16 bytes = 64. ctx.typeOf = map[regalloc.VReg]ssa.Type{x1VReg: ssa.TypeI64, v1VReg: ssa.TypeF64} i1, i2 := m.allocateNop(), m.allocateNop() @@ -86,14 +85,13 @@ func TestRegAllocFunctionImpl_ReloadRegisterAfter(t *testing.T) { m.rootInstr = i1 require.Equal(t, ` - ldr d1, [sp, #0x48] - ldr x1, [sp, #0x40] + ldr d1, [sp, #0x18] + ldr x1, [sp, #0x10] `, m.Format()) } func TestRegAllocFunctionImpl_StoreRegisterBefore(t *testing.T) { ctx, _, m := newSetupWithMockContext() - m.clobberedRegs = make([]regalloc.VReg, 3) // This will make the beginning of the spill slot at (3 + 1(frame size))* 16 bytes = 64. ctx.typeOf = map[regalloc.VReg]ssa.Type{x1VReg: ssa.TypeI64, v1VReg: ssa.TypeF64} i1, i2 := m.allocateNop(), m.allocateNop() @@ -117,8 +115,8 @@ func TestRegAllocFunctionImpl_StoreRegisterBefore(t *testing.T) { m.rootInstr = i1 require.Equal(t, ` - str x1, [sp, #0x40] - str d1, [sp, #0x48] + str x1, [sp, #0x10] + str d1, [sp, #0x18] `, m.Format()) } diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_test.go index 9edd9331..da4227c7 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_test.go @@ -108,17 +108,17 @@ func TestMachine_ret0OffsetFromSP(t *testing.T) { } func TestMachine_getVRegSpillSlotOffsetFromSP(t *testing.T) { - m := &machine{clobberedRegs: make([]regalloc.VReg, 10), spillSlots: make(map[regalloc.VRegID]int64)} + m := &machine{spillSlots: make(map[regalloc.VRegID]int64)} id := regalloc.VRegID(1) offset := m.getVRegSpillSlotOffsetFromSP(id, 8) - require.Equal(t, int64(160)+16, offset) + require.Equal(t, int64(16), offset) require.Equal(t, int64(8), m.spillSlotSize) _, ok := m.spillSlots[id] require.True(t, ok) id = 100 offset = m.getVRegSpillSlotOffsetFromSP(id, 16) - require.Equal(t, int64(160)+16+8, offset) + require.Equal(t, int64(16+8), offset) require.Equal(t, int64(24), m.spillSlotSize) _, ok = m.spillSlots[id] require.True(t, ok)