This simplifies the calling convention and consolidates the call frame stack and value stack into a single stack. As a result, the cost of function calls decreases because we now don't need to check the boundary twice (value and call frame stacks) at each function call. The following is the result of the benchmark for recursive Fibonacci function in integration_test/bench/testdata/case.go, and it shows that this actually improves the performance of function calls. [amd64] name old time/op new time/op delta Invocation/compiler/fib_for_5-32 109ns ± 3% 81ns ± 1% -25.86% (p=0.008 n=5+5) Invocation/compiler/fib_for_10-32 556ns ± 3% 473ns ± 3% -14.99% (p=0.008 n=5+5) Invocation/compiler/fib_for_20-32 61.4µs ± 2% 55.9µs ± 5% -8.98% (p=0.008 n=5+5) Invocation/compiler/fib_for_30-32 7.41ms ± 3% 6.83ms ± 3% -7.90% (p=0.008 n=5+5) [arm64] name old time/op new time/op delta Invocation/compiler/fib_for_5-10 67.7ns ± 1% 60.2ns ± 1% -11.12% (p=0.000 n=9+9) Invocation/compiler/fib_for_10-10 487ns ± 1% 460ns ± 0% -5.56% (p=0.000 n=10+9) Invocation/compiler/fib_for_20-10 58.0µs ± 1% 54.3µs ± 1% -6.38% (p=0.000 n=10+10) Invocation/compiler/fib_for_30-10 7.12ms ± 1% 6.67ms ± 1% -6.31% (p=0.000 n=10+9) Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
220 lines
7.4 KiB
Go
220 lines
7.4 KiB
Go
package compiler
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"testing"
|
|
|
|
"github.com/tetratelabs/wazero/internal/asm"
|
|
"github.com/tetratelabs/wazero/internal/asm/arm64"
|
|
"github.com/tetratelabs/wazero/internal/testing/require"
|
|
"github.com/tetratelabs/wazero/internal/wasm"
|
|
"github.com/tetratelabs/wazero/internal/wazeroir"
|
|
)
|
|
|
|
// TestArm64Compiler_V128Shuffle_ConstTable_MiddleOfFunction ensures that flushing constant table in the middle of
|
|
// function works well by intentionally setting arm64.AssemblerImpl MaxDisplacementForConstantPool = 0.
|
|
func TestArm64Compiler_V128Shuffle_ConstTable_MiddleOfFunction(t *testing.T) {
|
|
env := newCompilerEnvironment()
|
|
compiler := env.requireNewCompiler(t, newCompiler,
|
|
&wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}})
|
|
|
|
err := compiler.compilePreamble()
|
|
require.NoError(t, err)
|
|
|
|
lanes := [16]byte{1, 1, 1, 1, 0, 0, 0, 0, 10, 10, 10, 10, 0, 0, 0, 0}
|
|
v := [16]byte{0: 0xa, 1: 0xb, 10: 0xc}
|
|
w := [16]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
|
exp := [16]byte{
|
|
0xb, 0xb, 0xb, 0xb,
|
|
0xa, 0xa, 0xa, 0xa,
|
|
0xc, 0xc, 0xc, 0xc,
|
|
0xa, 0xa, 0xa, 0xa,
|
|
}
|
|
|
|
err = compiler.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: binary.LittleEndian.Uint64(v[:8]),
|
|
Hi: binary.LittleEndian.Uint64(v[8:]),
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
err = compiler.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: binary.LittleEndian.Uint64(w[:8]),
|
|
Hi: binary.LittleEndian.Uint64(w[8:]),
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
err = compiler.compileV128Shuffle(&wazeroir.OperationV128Shuffle{Lanes: lanes})
|
|
require.NoError(t, err)
|
|
|
|
assembler := compiler.(*arm64Compiler).assembler.(*arm64.AssemblerImpl)
|
|
assembler.MaxDisplacementForConstantPool = 0 // Ensures that constant table for shuffle will be flushed immediately.
|
|
|
|
err = compiler.compileReturnFunction()
|
|
require.NoError(t, err)
|
|
|
|
// Generate and run the code under test.
|
|
code, _, err := compiler.compile()
|
|
require.NoError(t, err)
|
|
|
|
env.exec(code)
|
|
|
|
lo, hi := env.stackTopAsV128()
|
|
var actual [16]byte
|
|
binary.LittleEndian.PutUint64(actual[:8], lo)
|
|
binary.LittleEndian.PutUint64(actual[8:], hi)
|
|
require.Equal(t, exp, actual)
|
|
}
|
|
|
|
func TestArm64Compiler_V128Shuffle_combinations(t *testing.T) {
|
|
movValueRegisterToRegister := func(t *testing.T, c *arm64Compiler, src *runtimeValueLocation, dst asm.Register) {
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, src.register, src.register, dst,
|
|
arm64.VectorArrangement16B)
|
|
c.locationStack.markRegisterUnused(src.register)
|
|
src.setRegister(dst)
|
|
// We have to set the lower 64-bits' location as well.
|
|
c.locationStack.stack[src.stackPointer-1].setRegister(dst)
|
|
c.locationStack.markRegisterUsed(dst)
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
init func(t *testing.T, c *arm64Compiler)
|
|
wReg, vReg asm.Register
|
|
verifyFnc func(t *testing.T, env *compilerEnv)
|
|
expStackPointerAfterShuffle uint64
|
|
}{
|
|
{
|
|
name: "w=v1, v=v2",
|
|
wReg: arm64.RegV1,
|
|
vReg: arm64.RegV2,
|
|
init: func(t *testing.T, c *arm64Compiler) {},
|
|
verifyFnc: func(t *testing.T, env *compilerEnv) {},
|
|
expStackPointerAfterShuffle: 2,
|
|
},
|
|
{
|
|
name: "w=v2, v=v1",
|
|
wReg: arm64.RegV2,
|
|
vReg: arm64.RegV1,
|
|
init: func(t *testing.T, c *arm64Compiler) {},
|
|
verifyFnc: func(t *testing.T, env *compilerEnv) {},
|
|
expStackPointerAfterShuffle: 2,
|
|
},
|
|
{
|
|
name: "w=v29, v=v30",
|
|
wReg: arm64.RegV29, // will be moved to v30.
|
|
vReg: arm64.RegV30, // will be moved to v29.
|
|
init: func(t *testing.T, c *arm64Compiler) {},
|
|
verifyFnc: func(t *testing.T, env *compilerEnv) {},
|
|
expStackPointerAfterShuffle: 2,
|
|
},
|
|
{
|
|
name: "w=v12, v=v30",
|
|
wReg: arm64.RegV12, // will be moved to v30.
|
|
vReg: arm64.RegV30, // will be moved to v29.
|
|
init: func(t *testing.T, c *arm64Compiler) {
|
|
// Set up the previous value on the v3 register.
|
|
err := c.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: 1234,
|
|
Hi: 5678,
|
|
})
|
|
require.NoError(t, err)
|
|
movValueRegisterToRegister(t, c, c.locationStack.peek(), arm64.RegV29)
|
|
},
|
|
verifyFnc: func(t *testing.T, env *compilerEnv) {
|
|
// Previous value on the V3 register must be saved onto the stack.
|
|
lo, hi := env.stack()[callFrameDataSizeInUint64], env.stack()[callFrameDataSizeInUint64+1]
|
|
require.Equal(t, uint64(1234), lo)
|
|
require.Equal(t, uint64(5678), hi)
|
|
},
|
|
expStackPointerAfterShuffle: 4,
|
|
},
|
|
{
|
|
name: "w=v29, v=v12",
|
|
wReg: arm64.RegV29, // will be moved to v30.
|
|
vReg: arm64.RegV12, // will be moved to v29.
|
|
init: func(t *testing.T, c *arm64Compiler) {
|
|
// Set up the previous value on the v3 register.
|
|
err := c.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: 1234,
|
|
Hi: 5678,
|
|
})
|
|
require.NoError(t, err)
|
|
movValueRegisterToRegister(t, c, c.locationStack.peek(), arm64.RegV30)
|
|
},
|
|
verifyFnc: func(t *testing.T, env *compilerEnv) {
|
|
// Previous value on the V3 register must be saved onto the stack.
|
|
lo, hi := env.stack()[callFrameDataSizeInUint64], env.stack()[callFrameDataSizeInUint64+1]
|
|
require.Equal(t, uint64(1234), lo)
|
|
require.Equal(t, uint64(5678), hi)
|
|
},
|
|
expStackPointerAfterShuffle: 4,
|
|
},
|
|
}
|
|
|
|
lanes := [16]byte{1, 1, 1, 1, 0, 0, 0, 0, 10, 10, 10, 10, 0, 0, 0, 31}
|
|
v := [16]byte{0: 0xa, 1: 0xb, 10: 0xc}
|
|
w := [16]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 1}
|
|
exp := [16]byte{
|
|
0xb, 0xb, 0xb, 0xb,
|
|
0xa, 0xa, 0xa, 0xa,
|
|
0xc, 0xc, 0xc, 0xc,
|
|
0xa, 0xa, 0xa, 1,
|
|
}
|
|
|
|
for _, tc := range tests {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
env := newCompilerEnvironment()
|
|
compiler := env.requireNewCompiler(t, newCompiler,
|
|
&wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}})
|
|
|
|
err := compiler.compilePreamble()
|
|
require.NoError(t, err)
|
|
|
|
ac := compiler.(*arm64Compiler)
|
|
tc.init(t, ac)
|
|
|
|
err = compiler.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: binary.LittleEndian.Uint64(v[:8]),
|
|
Hi: binary.LittleEndian.Uint64(v[8:]),
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
vLocation := compiler.runtimeValueLocationStack().peek()
|
|
movValueRegisterToRegister(t, ac, vLocation, tc.vReg)
|
|
|
|
err = compiler.compileV128Const(&wazeroir.OperationV128Const{
|
|
Lo: binary.LittleEndian.Uint64(w[:8]),
|
|
Hi: binary.LittleEndian.Uint64(w[8:]),
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
wLocation := compiler.runtimeValueLocationStack().peek()
|
|
movValueRegisterToRegister(t, ac, wLocation, tc.wReg)
|
|
|
|
err = compiler.compileV128Shuffle(&wazeroir.OperationV128Shuffle{Lanes: lanes})
|
|
require.NoError(t, err)
|
|
|
|
requireRuntimeLocationStackPointerEqual(t, tc.expStackPointerAfterShuffle, compiler)
|
|
require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters))
|
|
|
|
err = compiler.compileReturnFunction()
|
|
require.NoError(t, err)
|
|
|
|
// Generate and run the code under test.
|
|
code, _, err := compiler.compile()
|
|
require.NoError(t, err)
|
|
|
|
env.exec(code)
|
|
|
|
lo, hi := env.stackTopAsV128()
|
|
var actual [16]byte
|
|
binary.LittleEndian.PutUint64(actual[:8], lo)
|
|
binary.LittleEndian.PutUint64(actual[8:], hi)
|
|
require.Equal(t, exp, actual)
|
|
|
|
tc.verifyFnc(t, env)
|
|
})
|
|
}
|
|
}
|