This changes the mmap strategy used in the compiler backend.
Previously, we used mmap syscall once per function and allocated the
executable pages each time. Basically, mmap can only allocate the
boundary of the page size of the underlying os. Even if the requested
executable is smaller than the page size, the entire page is marked as
executable and won't be reused by Go runtime. Therefore, we wasted
roughly `(len(body)%osPageSize)*function`.
Even though we still need to align each function on 16 bytes boundary
when mmaping per module, the wasted space is much smaller than before.
The following benchmark results shows that this improves the overall
compilation performance while showing the heap usage increased.
However, the increased heap usage is totally offset by the hidden wasted
memory page which is not measured by Go's -benchmem.
Actually, when I did the experiments, I observed that roughly 20~30mb are
wasted on arm64 previously which is larger than the increased heap usage
in this result. More importantly, this increased heap usage is a target of GC
and should be ignorable in the long-running program vs the wasted page
is persistent until the CompiledModule is closed.
Not only the actual compilation time, the result indicates that this could
improve the overall Go runtime's performance maybe thanks to not abusing
runtime.Finalizer since you can see this improves the subsequent interpreter
benchmark results.
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/bench
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
Compilation_sqlite3/compiler-10 183.4m ± 0% 175.9m ± 2% -4.10% (p=0.001 n=7)
Compilation_sqlite3/interpreter-10 61.59m ± 0% 59.57m ± 0% -3.29% (p=0.001 n=7)
geomean 106.3m 102.4m -3.69%
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
Compilation_sqlite3/compiler-10 42.93Mi ± 0% 54.33Mi ± 0% +26.56% (p=0.001 n=7)
Compilation_sqlite3/interpreter-10 51.75Mi ± 0% 51.75Mi ± 0% -0.01% (p=0.001 n=7)
geomean 47.13Mi 53.02Mi +12.49%
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
Compilation_sqlite3/compiler-10 26.07k ± 0% 26.06k ± 0% ~ (p=0.149 n=7)
Compilation_sqlite3/interpreter-10 13.90k ± 0% 13.90k ± 0% ~ (p=0.421 n=7)
geomean 19.03k 19.03k -0.02%
goos: linux
goarch: amd64
pkg: github.com/tetratelabs/wazero/internal/integration_test/bench
cpu: AMD Ryzen 9 3950X 16-Core Processor
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
Compilation_sqlite3/compiler-32 384.4m ± 2% 373.0m ± 4% -2.97% (p=0.001 n=7)
Compilation_sqlite3/interpreter-32 86.09m ± 4% 65.05m ± 2% -24.44% (p=0.001 n=7)
geomean 181.9m 155.8m -14.38%
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
Compilation_sqlite3/compiler-32 49.40Mi ± 0% 59.91Mi ± 0% +21.29% (p=0.001 n=7)
Compilation_sqlite3/interpreter-32 51.77Mi ± 0% 51.76Mi ± 0% -0.02% (p=0.001 n=7)
geomean 50.57Mi 55.69Mi +10.12%
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
Compilation_sqlite3/compiler-32 28.70k ± 0% 28.70k ± 0% ~ (p=0.925 n=7)
Compilation_sqlite3/interpreter-32 14.00k ± 0% 14.00k ± 0% -0.04% (p=0.010 n=7)
geomean 20.05k 20.04k -0.02%
```
resolves #1060
Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
309 lines
12 KiB
Go
309 lines
12 KiB
Go
package compiler
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"runtime"
|
|
"testing"
|
|
"unsafe"
|
|
|
|
"github.com/tetratelabs/wazero/internal/platform"
|
|
"github.com/tetratelabs/wazero/internal/testing/require"
|
|
"github.com/tetratelabs/wazero/internal/wasm"
|
|
"github.com/tetratelabs/wazero/internal/wazeroir"
|
|
)
|
|
|
|
func TestMain(m *testing.M) {
|
|
if !platform.CompilerSupported() {
|
|
os.Exit(0)
|
|
}
|
|
os.Exit(m.Run())
|
|
}
|
|
|
|
// Ensures that the offset consts do not drift when we manipulate the target
|
|
// structs.
|
|
//
|
|
// Note: This is a package initializer as many tests could fail if these
|
|
// constants are misaligned, hiding the root cause.
|
|
func init() {
|
|
var me moduleEngine
|
|
requireEqual := func(expected, actual int, name string) {
|
|
if expected != actual {
|
|
panic(fmt.Sprintf("%s: expected %d, but was %d", name, expected, actual))
|
|
}
|
|
}
|
|
requireEqual(int(unsafe.Offsetof(me.functions)), moduleEngineFunctionsOffset, "moduleEngineFunctionsOffset")
|
|
|
|
var ce callEngine
|
|
// Offsets for callEngine.moduleContext.
|
|
requireEqual(int(unsafe.Offsetof(ce.fn)), callEngineModuleContextFnOffset, "callEngineModuleContextFnOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.moduleInstance)), callEngineModuleContextModuleInstanceOffset, "callEngineModuleContextModuleInstanceOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.globalElement0Address)), callEngineModuleContextGlobalElement0AddressOffset, "callEngineModuleContextGlobalElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.memoryElement0Address)), callEngineModuleContextMemoryElement0AddressOffset, "callEngineModuleContextMemoryElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.memorySliceLen)), callEngineModuleContextMemorySliceLenOffset, "callEngineModuleContextMemorySliceLenOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.memoryInstance)), callEngineModuleContextMemoryInstanceOffset, "callEngineModuleContextMemoryInstanceOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.tablesElement0Address)), callEngineModuleContextTablesElement0AddressOffset, "callEngineModuleContextTablesElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.functionsElement0Address)), callEngineModuleContextFunctionsElement0AddressOffset, "callEngineModuleContextFunctionsElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.typeIDsElement0Address)), callEngineModuleContextTypeIDsElement0AddressOffset, "callEngineModuleContextTypeIDsElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.dataInstancesElement0Address)), callEngineModuleContextDataInstancesElement0AddressOffset, "callEngineModuleContextDataInstancesElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.elementInstancesElement0Address)), callEngineModuleContextElementInstancesElement0AddressOffset, "callEngineModuleContextElementInstancesElement0AddressOffset")
|
|
|
|
// Offsets for callEngine.stackContext
|
|
requireEqual(int(unsafe.Offsetof(ce.stackPointer)), callEngineStackContextStackPointerOffset, "callEngineStackContextStackPointerOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.stackBasePointerInBytes)), callEngineStackContextStackBasePointerInBytesOffset, "callEngineStackContextStackBasePointerInBytesOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.stackElement0Address)), callEngineStackContextStackElement0AddressOffset, "callEngineStackContextStackElement0AddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.stackLenInBytes)), callEngineStackContextStackLenInBytesOffset, "callEngineStackContextStackLenInBytesOffset")
|
|
|
|
// Offsets for callEngine.exitContext.
|
|
requireEqual(int(unsafe.Offsetof(ce.statusCode)), callEngineExitContextNativeCallStatusCodeOffset, "callEngineExitContextNativeCallStatusCodeOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.builtinFunctionCallIndex)), callEngineExitContextBuiltinFunctionCallIndexOffset, "callEngineExitContextBuiltinFunctionCallIndexOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.returnAddress)), callEngineExitContextReturnAddressOffset, "callEngineExitContextReturnAddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(ce.callerModuleInstance)), callEngineExitContextCallerModuleInstanceOffset, "callEngineExitContextCallerModuleInstanceOffset")
|
|
|
|
// Size and offsets for callFrame.
|
|
var frame callFrame
|
|
requireEqual(int(unsafe.Sizeof(frame))/8, callFrameDataSizeInUint64, "callFrameDataSize")
|
|
|
|
// Offsets for code.
|
|
var f function
|
|
requireEqual(int(unsafe.Offsetof(f.codeInitialAddress)), functionCodeInitialAddressOffset, "functionCodeInitialAddressOffset")
|
|
requireEqual(int(unsafe.Offsetof(f.moduleInstance)), functionModuleInstanceOffset, "functionModuleInstanceOffset")
|
|
requireEqual(int(unsafe.Offsetof(f.typeID)), functionTypeIDOffset, "functionTypeIDOffset")
|
|
requireEqual(int(unsafe.Sizeof(f)), functionSize, "functionModuleInstanceOffset")
|
|
|
|
// Offsets for wasm.ModuleInstance.
|
|
var moduleInstance wasm.ModuleInstance
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.Globals)), moduleInstanceGlobalsOffset, "moduleInstanceGlobalsOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.MemoryInstance)), moduleInstanceMemoryOffset, "moduleInstanceMemoryOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.Tables)), moduleInstanceTablesOffset, "moduleInstanceTablesOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.Engine)), moduleInstanceEngineOffset, "moduleInstanceEngineOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.TypeIDs)), moduleInstanceTypeIDsOffset, "moduleInstanceTypeIDsOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.DataInstances)), moduleInstanceDataInstancesOffset, "moduleInstanceDataInstancesOffset")
|
|
requireEqual(int(unsafe.Offsetof(moduleInstance.ElementInstances)), moduleInstanceElementInstancesOffset, "moduleInstanceElementInstancesOffset")
|
|
|
|
// Offsets for wasm.Table.
|
|
var tableInstance wasm.TableInstance
|
|
requireEqual(int(unsafe.Offsetof(tableInstance.References)), tableInstanceTableOffset, "tableInstanceTableOffset")
|
|
// We add "+8" to get the length of Tables[0].Table
|
|
// since the slice header is laid out as {Data uintptr, Len int64, Cap int64} on memory.
|
|
requireEqual(int(unsafe.Offsetof(tableInstance.References)+8), tableInstanceTableLenOffset, "tableInstanceTableLenOffset")
|
|
|
|
// Offsets for wasm.Memory
|
|
var memoryInstance wasm.MemoryInstance
|
|
requireEqual(int(unsafe.Offsetof(memoryInstance.Buffer)), memoryInstanceBufferOffset, "memoryInstanceBufferOffset")
|
|
// "+8" because the slice header is laid out as {Data uintptr, Len int64, Cap int64} on memory.
|
|
requireEqual(int(unsafe.Offsetof(memoryInstance.Buffer)+8), memoryInstanceBufferLenOffset, "memoryInstanceBufferLenOffset")
|
|
|
|
// Offsets for wasm.GlobalInstance
|
|
var globalInstance wasm.GlobalInstance
|
|
requireEqual(int(unsafe.Offsetof(globalInstance.Val)), globalInstanceValueOffset, "globalInstanceValueOffset")
|
|
|
|
var dataInstance wasm.DataInstance
|
|
requireEqual(int(unsafe.Sizeof(dataInstance)), dataInstanceStructSize, "dataInstanceStructSize")
|
|
|
|
var elementInstance wasm.ElementInstance
|
|
requireEqual(int(unsafe.Sizeof(elementInstance)), elementInstanceStructSize, "elementInstanceStructSize")
|
|
|
|
var pointer uintptr
|
|
requireEqual(int(unsafe.Sizeof(pointer)), 1<<pointerSizeLog2, "pointerSizeLog2")
|
|
}
|
|
|
|
type compilerEnv struct {
|
|
me *moduleEngine
|
|
ce *callEngine
|
|
moduleInstance *wasm.ModuleInstance
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsUint32() uint32 {
|
|
return uint32(j.stack()[j.ce.stackContext.stackPointer-1])
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsInt32() int32 {
|
|
return int32(j.stack()[j.ce.stackContext.stackPointer-1])
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsUint64() uint64 {
|
|
return j.stack()[j.ce.stackContext.stackPointer-1]
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsInt64() int64 {
|
|
return int64(j.stack()[j.ce.stackContext.stackPointer-1])
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsFloat32() float32 {
|
|
return math.Float32frombits(uint32(j.stack()[j.ce.stackContext.stackPointer-1]))
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsFloat64() float64 {
|
|
return math.Float64frombits(j.stack()[j.ce.stackContext.stackPointer-1])
|
|
}
|
|
|
|
func (j *compilerEnv) stackTopAsV128() (lo uint64, hi uint64) {
|
|
st := j.stack()
|
|
return st[j.ce.stackContext.stackPointer-2], st[j.ce.stackContext.stackPointer-1]
|
|
}
|
|
|
|
func (j *compilerEnv) memory() []byte {
|
|
return j.moduleInstance.MemoryInstance.Buffer
|
|
}
|
|
|
|
func (j *compilerEnv) stack() []uint64 {
|
|
return j.ce.stack
|
|
}
|
|
|
|
func (j *compilerEnv) compilerStatus() nativeCallStatusCode {
|
|
return j.ce.exitContext.statusCode
|
|
}
|
|
|
|
func (j *compilerEnv) builtinFunctionCallAddress() wasm.Index {
|
|
return j.ce.exitContext.builtinFunctionCallIndex
|
|
}
|
|
|
|
// stackPointer returns the stack pointer minus the call frame.
|
|
func (j *compilerEnv) stackPointer() uint64 {
|
|
return j.ce.stackContext.stackPointer - callFrameDataSizeInUint64
|
|
}
|
|
|
|
func (j *compilerEnv) stackBasePointer() uint64 {
|
|
return j.ce.stackContext.stackBasePointerInBytes >> 3
|
|
}
|
|
|
|
func (j *compilerEnv) setStackPointer(sp uint64) {
|
|
j.ce.stackContext.stackPointer = sp
|
|
}
|
|
|
|
func (j *compilerEnv) addGlobals(g ...*wasm.GlobalInstance) {
|
|
j.moduleInstance.Globals = append(j.moduleInstance.Globals, g...)
|
|
}
|
|
|
|
func (j *compilerEnv) globals() []*wasm.GlobalInstance {
|
|
return j.moduleInstance.Globals
|
|
}
|
|
|
|
func (j *compilerEnv) addTable(table *wasm.TableInstance) {
|
|
j.moduleInstance.Tables = append(j.moduleInstance.Tables, table)
|
|
}
|
|
|
|
func (j *compilerEnv) setStackBasePointer(sp uint64) {
|
|
j.ce.stackContext.stackBasePointerInBytes = sp << 3
|
|
}
|
|
|
|
func (j *compilerEnv) module() *wasm.ModuleInstance {
|
|
return j.moduleInstance
|
|
}
|
|
|
|
func (j *compilerEnv) moduleEngine() *moduleEngine {
|
|
return j.me
|
|
}
|
|
|
|
func (j *compilerEnv) callEngine() *callEngine {
|
|
return j.ce
|
|
}
|
|
|
|
func (j *compilerEnv) exec(machineCode []byte) {
|
|
executable := requireExecutable(machineCode)
|
|
f := &function{
|
|
parent: &compiledFunction{parent: &compiledModule{executable: executable}},
|
|
codeInitialAddress: uintptr(unsafe.Pointer(&executable[0])),
|
|
moduleInstance: j.moduleInstance,
|
|
}
|
|
j.ce.initialFn = f
|
|
j.ce.fn = f
|
|
|
|
nativecall(
|
|
uintptr(unsafe.Pointer(&executable[0])),
|
|
uintptr(unsafe.Pointer(j.ce)),
|
|
j.moduleInstance,
|
|
)
|
|
}
|
|
|
|
func (j *compilerEnv) requireNewCompiler(t *testing.T, functionType *wasm.FunctionType, fn func() compiler, ir *wazeroir.CompilationResult) compilerImpl {
|
|
requireSupportedOSArch(t)
|
|
|
|
if ir == nil {
|
|
ir = &wazeroir.CompilationResult{
|
|
LabelCallers: map[wazeroir.Label]uint32{},
|
|
}
|
|
}
|
|
|
|
c := fn()
|
|
c.Init(functionType, ir, false)
|
|
|
|
ret, ok := c.(compilerImpl)
|
|
require.True(t, ok)
|
|
return ret
|
|
}
|
|
|
|
// compilerImpl is the interface used for architecture-independent unit tests in this pkg.
|
|
// This is currently implemented by amd64 and arm64.
|
|
type compilerImpl interface {
|
|
compiler
|
|
compileExitFromNativeCode(nativeCallStatusCode)
|
|
compileMaybeGrowStack() error
|
|
compileReturnFunction() error
|
|
assignStackPointerCeil(uint64)
|
|
setStackPointerCeil(uint64)
|
|
compileReleaseRegisterToStack(loc *runtimeValueLocation)
|
|
setRuntimeValueLocationStack(*runtimeValueLocationStack)
|
|
compileEnsureOnRegister(loc *runtimeValueLocation) error
|
|
compileModuleContextInitialization() error
|
|
}
|
|
|
|
const defaultMemoryPageNumInTest = 1
|
|
|
|
func newCompilerEnvironment() *compilerEnv {
|
|
me := &moduleEngine{}
|
|
return &compilerEnv{
|
|
me: me,
|
|
moduleInstance: &wasm.ModuleInstance{
|
|
MemoryInstance: &wasm.MemoryInstance{Buffer: make([]byte, wasm.MemoryPageSize*defaultMemoryPageNumInTest)},
|
|
Tables: []*wasm.TableInstance{},
|
|
Globals: []*wasm.GlobalInstance{},
|
|
Engine: me,
|
|
},
|
|
ce: me.newCallEngine(initialStackSize, &function{parent: &compiledFunction{parent: &compiledModule{}}}),
|
|
}
|
|
}
|
|
|
|
// requireRuntimeLocationStackPointerEqual ensures that the compiler's runtimeValueLocationStack has
|
|
// the expected stack pointer value relative to the call frame.
|
|
func requireRuntimeLocationStackPointerEqual(t *testing.T, expSP uint64, c compiler) {
|
|
require.Equal(t, expSP, c.runtimeValueLocationStack().sp-callFrameDataSizeInUint64)
|
|
}
|
|
|
|
// TestCompileI32WrapFromI64 is the regression test for https://github.com/tetratelabs/wazero/issues/1008
|
|
func TestCompileI32WrapFromI64(t *testing.T) {
|
|
c := newCompiler()
|
|
c.Init(&wasm.FunctionType{}, nil, false)
|
|
|
|
// Push the original i64 value.
|
|
loc := c.runtimeValueLocationStack().pushRuntimeValueLocationOnStack()
|
|
loc.valueType = runtimeValueTypeI64
|
|
// Wrap it as the i32, and this should result in having runtimeValueTypeI32 on top of the stack.
|
|
err := c.compileI32WrapFromI64()
|
|
require.NoError(t, err)
|
|
require.Equal(t, runtimeValueTypeI32, loc.valueType)
|
|
}
|
|
|
|
func operationPtr(operation wazeroir.UnionOperation) *wazeroir.UnionOperation {
|
|
return &operation
|
|
}
|
|
|
|
func requireExecutable(original []byte) (executable []byte) {
|
|
executable, err := platform.MmapCodeSegment(len(original))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
copy(executable, original)
|
|
|
|
if runtime.GOARCH == "arm64" {
|
|
err = platform.MprotectRX(executable)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
return executable
|
|
}
|