From 0ca7ee134018a735dc6b3e0f5c97bf3b132ca6d2 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Mon, 18 Apr 2022 20:07:28 +0900 Subject: [PATCH] Use same CompiledCode for import replacement (#478) This commit allows CompiledCode to be re-used regardless of the existence of import replacement configs for instantiation. In order to achieve this, we introduce ModuleID, which is sha256 checksum calculated on source bytes, as a key for module compilation cache. Previously, we used*wasm.Module as keys for caches which differ before/after import replacement. Signed-off-by: Takeshi Yoneda takeshi@tetrate.io --- builder.go | 4 +- builder_test.go | 3 + config.go | 26 +------- .../integration_test/spectest/spec_test.go | 5 ++ internal/testing/enginetest/enginetest.go | 9 +++ internal/wasm/host.go | 4 ++ internal/wasm/interpreter/interpreter.go | 10 +-- internal/wasm/interpreter/interpreter_test.go | 8 ++- internal/wasm/jit/engine.go | 10 +-- internal/wasm/jit/engine_test.go | 7 +- internal/wasm/module.go | 12 ++++ wasm.go | 14 +--- wasm_test.go | 65 +++++-------------- 13 files changed, 78 insertions(+), 99 deletions(-) diff --git a/builder.go b/builder.go index c52a3833..c2213ae1 100644 --- a/builder.go +++ b/builder.go @@ -256,9 +256,7 @@ func (b *moduleBuilder) Build() (*CompiledCode, error) { return nil, err } - ret := &CompiledCode{module: module} - ret.addCacheEntry(module, b.r.store.Engine) - return &CompiledCode{module: module}, nil + return &CompiledCode{module: module, compiledEngine: b.r.store.Engine}, nil } // Instantiate implements ModuleBuilder.Instantiate diff --git a/builder_test.go b/builder_test.go index 279e9869..4a18bd7d 100644 --- a/builder_test.go +++ b/builder_test.go @@ -346,8 +346,11 @@ func TestNewModuleBuilder_Build(t *testing.T) { b := tc.input(NewRuntime()).(*moduleBuilder) m, err := b.Build() require.NoError(t, err) + requireHostModuleEquals(t, tc.expected, m.module) + require.Equal(t, b.r.store.Engine, m.compiledEngine) + // Built module must be instantiable by Engine. _, err = b.r.InstantiateModule(m) require.NoError(t, err) diff --git a/config.go b/config.go index bf17564f..7d73a47a 100644 --- a/config.go +++ b/config.go @@ -148,35 +148,15 @@ func (c *RuntimeConfig) WithFeatureMultiValue(enabled bool) *RuntimeConfig { // See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#semantic-phases%E2%91%A0 type CompiledCode struct { module *wasm.Module - // cachedEngines maps wasm.Engine to []*wasm.Module which originate from this .module. - // This is necessary to track which engine caches *Module where latter might be different - // from .module via import replacement config (ModuleConfig.WithImport). - cachedEngines map[wasm.Engine]map[*wasm.Module]struct{} + // compiledEngine holds an engine on which `module` is compiled. + compiledEngine wasm.Engine } // Close releases all the allocated resources for this CompiledCode. // // Note: it is safe to call Close while having outstanding calls from Modules instantiated from this *CompiledCode. func (c *CompiledCode) Close() { - for engine, modules := range c.cachedEngines { - for module := range modules { - engine.DeleteCompiledModule(module) - } - } -} - -func (c *CompiledCode) addCacheEntry(module *wasm.Module, engine wasm.Engine) { - if c.cachedEngines == nil { - c.cachedEngines = map[wasm.Engine]map[*wasm.Module]struct{}{} - } - - cache, ok := c.cachedEngines[engine] - if !ok { - cache = map[*wasm.Module]struct{}{} - c.cachedEngines[engine] = cache - } - - cache[module] = struct{}{} + c.compiledEngine.DeleteCompiledModule(c.module) } // ModuleConfig configures resources needed by functions that have low-level interactions with the host operating system. diff --git a/internal/integration_test/spectest/spec_test.go b/internal/integration_test/spectest/spec_test.go index 20d77f55..88934e1c 100644 --- a/internal/integration_test/spectest/spec_test.go +++ b/internal/integration_test/spectest/spec_test.go @@ -326,6 +326,7 @@ func runTest(t *testing.T, newEngine func(wasm.Features) wasm.Engine) { mod, err := binary.DecodeModule(buf, enabledFeatures, wasm.MemoryMaxPages) require.NoError(t, err, msg) require.NoError(t, mod.Validate(enabledFeatures)) + mod.AssignModuleID(buf) moduleName := c.Name if moduleName == "" { // When "(module ...) directive doesn't have name. @@ -337,8 +338,10 @@ func runTest(t *testing.T, newEngine func(wasm.Features) wasm.Engine) { moduleName = c.Filename } } + err = store.Engine.CompileModule(mod) require.NoError(t, err, msg) + moduleName = strings.TrimPrefix(moduleName, "$") _, err = store.Instantiate(context.Background(), mod, moduleName, nil) lastInstantiatedModuleName = moduleName @@ -463,6 +466,8 @@ func requireInstantiationError(t *testing.T, store *wasm.Store, buf []byte, msg return } + mod.AssignModuleID(buf) + err = store.Engine.CompileModule(mod) if err != nil { return diff --git a/internal/testing/enginetest/enginetest.go b/internal/testing/enginetest/enginetest.go index 0174b40e..dea748a5 100644 --- a/internal/testing/enginetest/enginetest.go +++ b/internal/testing/enginetest/enginetest.go @@ -115,6 +115,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { TypeSection: []*wasm.FunctionType{}, FunctionSection: []uint32{}, CodeSection: []*wasm.Code{}, + ID: wasm.ModuleID{0}, } err := e.CompileModule(m) require.NoError(t, err) @@ -135,6 +136,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { CodeSection: []*wasm.Code{ {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{1}, } err := e.CompileModule(m) @@ -165,6 +167,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { CodeSection: []*wasm.Code{ {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{2}, } err := e.CompileModule(importedModule) @@ -189,6 +192,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { TypeSection: []*wasm.FunctionType{}, FunctionSection: []uint32{}, CodeSection: []*wasm.Code{}, + ID: wasm.ModuleID{3}, } err = e.CompileModule(importingModule) require.NoError(t, err) @@ -210,6 +214,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { CodeSection: []*wasm.Code{ {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{4}, } err := e.CompileModule(importedModule) @@ -233,6 +238,7 @@ func RunTestEngine_NewModuleEngine_InitTable(t *testing.T, et EngineTester) { CodeSection: []*wasm.Code{ {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{5}, } err = e.CompileModule(importingModule) @@ -506,6 +512,7 @@ func setupCallTests(t *testing.T, e wasm.Engine) (*wasm.ModuleInstance, *wasm.Mo HostFunctionSection: []*reflect.Value{&hostFnVal}, TypeSection: []*wasm.FunctionType{ft}, FunctionSection: []wasm.Index{0}, + ID: wasm.ModuleID{0}, } err := e.CompileModule(hostFnModule) @@ -526,6 +533,7 @@ func setupCallTests(t *testing.T, e wasm.Engine) (*wasm.ModuleInstance, *wasm.Mo {Body: []byte{wasm.OpcodeLocalGet, 0, wasm.OpcodeCall, byte(0), // Calling imported host function ^. wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{1}, } err = e.CompileModule(importedModule) @@ -550,6 +558,7 @@ func setupCallTests(t *testing.T, e wasm.Engine) (*wasm.ModuleInstance, *wasm.Mo {Body: []byte{wasm.OpcodeLocalGet, 0, wasm.OpcodeCall, 0 /* only one imported function */, wasm.OpcodeEnd}}, }, ImportSection: []*wasm.Import{{}}, + ID: wasm.ModuleID{2}, } err = e.CompileModule(importingModule) require.NoError(t, err) diff --git a/internal/wasm/host.go b/internal/wasm/host.go index c330c163..0899460a 100644 --- a/internal/wasm/host.go +++ b/internal/wasm/host.go @@ -64,6 +64,10 @@ func NewHostModule( return } } + + // Assins the ModuleID by calculating sha256 on inputs as host modules do not have `source` to hash. + m.AssignModuleID([]byte(fmt.Sprintf("%s:%v:%v:%v:%v", + moduleName, nameToGoFunc, nameToMemory, nameToGlobal, enabledFeatures))) return } diff --git a/internal/wasm/interpreter/interpreter.go b/internal/wasm/interpreter/interpreter.go index 4c144895..33b2436a 100644 --- a/internal/wasm/interpreter/interpreter.go +++ b/internal/wasm/interpreter/interpreter.go @@ -22,14 +22,14 @@ var callStackCeiling = buildoptions.CallStackCeiling // engine is an interpreter implementation of wasm.Engine type engine struct { enabledFeatures wasm.Features - codes map[*wasm.Module][]*code // guarded by mutex. + codes map[wasm.ModuleID][]*code // guarded by mutex. mux sync.RWMutex } func NewEngine(enabledFeatures wasm.Features) wasm.Engine { return &engine{ enabledFeatures: enabledFeatures, - codes: map[*wasm.Module][]*code{}, + codes: map[wasm.ModuleID][]*code{}, } } @@ -41,19 +41,19 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) { func (e *engine) deleteCodes(module *wasm.Module) { e.mux.Lock() defer e.mux.Unlock() - delete(e.codes, module) + delete(e.codes, module.ID) } func (e *engine) addCodes(module *wasm.Module, fs []*code) { e.mux.Lock() defer e.mux.Unlock() - e.codes[module] = fs + e.codes[module.ID] = fs } func (e *engine) getCodes(module *wasm.Module) (fs []*code, ok bool) { e.mux.RLock() defer e.mux.RUnlock() - fs, ok = e.codes[module] + fs, ok = e.codes[module.ID] return } diff --git a/internal/wasm/interpreter/interpreter_test.go b/internal/wasm/interpreter/interpreter_test.go index b47c3571..a9afc49d 100644 --- a/internal/wasm/interpreter/interpreter_test.go +++ b/internal/wasm/interpreter/interpreter_test.go @@ -217,13 +217,14 @@ func TestInterpreter_Compile(t *testing.T) { {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeCall}}, // Call instruction without immediate for call target index is invalid and should fail to compile. }, + ID: wasm.ModuleID{}, } err := e.CompileModule(errModule) require.EqualError(t, err, "failed to lower func[2/3] to wazeroir: handling instruction: apply stack failed for call: reading immediates: EOF") // On the compilation failure, all the compiled functions including succeeded ones must be released. - _, ok := e.codes[errModule] + _, ok := e.codes[errModule.ID] require.False(t, ok) }) t.Run("ok", func(t *testing.T) { @@ -238,15 +239,16 @@ func TestInterpreter_Compile(t *testing.T) { {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{}, } err := e.CompileModule(okModule) require.NoError(t, err) - compiled, ok := e.codes[okModule] + compiled, ok := e.codes[okModule.ID] require.True(t, ok) require.Equal(t, len(okModule.FunctionSection), len(compiled)) - _, ok = e.codes[okModule] + _, ok = e.codes[okModule.ID] require.True(t, ok) }) } diff --git a/internal/wasm/jit/engine.go b/internal/wasm/jit/engine.go index b0aace76..4cb93a4b 100644 --- a/internal/wasm/jit/engine.go +++ b/internal/wasm/jit/engine.go @@ -19,7 +19,7 @@ type ( // engine is an JIT implementation of wasm.Engine engine struct { enabledFeatures wasm.Features - codes map[*wasm.Module][]*code // guarded by mutex. + codes map[wasm.ModuleID][]*code // guarded by mutex. mux sync.RWMutex // setFinalizer defaults to runtime.SetFinalizer, but overridable for tests. setFinalizer func(obj interface{}, finalizer interface{}) @@ -475,19 +475,19 @@ func (e *engine) NewModuleEngine(name string, module *wasm.Module, importedFunct func (e *engine) deleteCodes(module *wasm.Module) { e.mux.Lock() defer e.mux.Unlock() - delete(e.codes, module) + delete(e.codes, module.ID) } func (e *engine) addCodes(module *wasm.Module, fs []*code) { e.mux.Lock() defer e.mux.Unlock() - e.codes[module] = fs + e.codes[module.ID] = fs } func (e *engine) getCodes(module *wasm.Module) (fs []*code, ok bool) { e.mux.RLock() defer e.mux.RUnlock() - fs, ok = e.codes[module] + fs, ok = e.codes[module.ID] return } @@ -568,7 +568,7 @@ func NewEngine(enabledFeatures wasm.Features) wasm.Engine { func newEngine(enabledFeatures wasm.Features) *engine { return &engine{ enabledFeatures: enabledFeatures, - codes: map[*wasm.Module][]*code{}, + codes: map[wasm.ModuleID][]*code{}, setFinalizer: runtime.SetFinalizer, } } diff --git a/internal/wasm/jit/engine_test.go b/internal/wasm/jit/engine_test.go index b036247e..959f2800 100644 --- a/internal/wasm/jit/engine_test.go +++ b/internal/wasm/jit/engine_test.go @@ -173,6 +173,7 @@ func TestJIT_CompileModule(t *testing.T) { {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeEnd}}, }, + ID: wasm.ModuleID{}, } err := e.CompileModule(okModule) @@ -182,7 +183,7 @@ func TestJIT_CompileModule(t *testing.T) { err = e.CompileModule(okModule) require.NoError(t, err) - compiled, ok := e.codes[okModule] + compiled, ok := e.codes[okModule.ID] require.True(t, ok) require.Equal(t, len(okModule.FunctionSection), len(compiled)) @@ -201,6 +202,7 @@ func TestJIT_CompileModule(t *testing.T) { {Body: []byte{wasm.OpcodeEnd}}, {Body: []byte{wasm.OpcodeCall}}, // Call instruction without immediate for call target index is invalid and should fail to compile. }, + ID: wasm.ModuleID{}, } e := et.NewEngine(wasm.Features20191205).(*engine) @@ -208,7 +210,7 @@ func TestJIT_CompileModule(t *testing.T) { require.EqualError(t, err, "failed to lower func[2/3] to wazeroir: handling instruction: apply stack failed for call: reading immediates: EOF") // On the compilation failure, the compiled functions must not be cached. - _, ok := e.codes[errModule] + _, ok := e.codes[errModule.ID] require.False(t, ok) }) } @@ -306,6 +308,7 @@ func TestJIT_SliceAllocatedOnHeap(t *testing.T) { {Type: wasm.ExternTypeFunc, Index: 1, Name: valueStackCorruption}, {Type: wasm.ExternTypeFunc, Index: 2, Name: callStackCorruption}, }, + ID: wasm.ModuleID{1}, } err = store.Engine.CompileModule(m) diff --git a/internal/wasm/module.go b/internal/wasm/module.go index 5509d1e7..f350d61c 100644 --- a/internal/wasm/module.go +++ b/internal/wasm/module.go @@ -2,6 +2,7 @@ package wasm import ( "bytes" + "crypto/sha256" "errors" "fmt" "reflect" @@ -161,8 +162,14 @@ type Module struct { // preservation ensures a consistent initialization result. // See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-instances%E2%91%A0 validatedElementSegments []*validatedElementSegment + + // ID is the sha256 value of the source code (text/binary) and is used for caching. + ID ModuleID } +// ModuleID represents sha256 hash value uniquely assigned to Module. +type ModuleID = [sha256.Size]byte + // The wazero specific limitation described at RATIONALE.md. // TL;DR; We multiply by 8 (to get offsets in bytes) and the multiplication result must be less than 32bit max const ( @@ -170,6 +177,11 @@ const ( MaximumFunctionIndex = uint32(1 << 27) ) +// AssignModuleID calculates a sha256 checksum on `source` and set Module.ID to the result. +func (m *Module) AssignModuleID(source []byte) { + m.ID = sha256.Sum256(source) +} + // TypeOfFunction returns the wasm.SectionIDType index for the given function namespace index or nil. // Note: The function index namespace is preceded by imported functions. // TODO: Returning nil should be impossible when decode results are validated. Validate decode before back-filling tests. diff --git a/wasm.go b/wasm.go index 6a02e6a3..58818aaa 100644 --- a/wasm.go +++ b/wasm.go @@ -164,13 +164,13 @@ func (r *runtime) CompileModule(source []byte) (*CompiledCode, error) { return nil, err } + internal.AssignModuleID(source) + if err = r.store.Engine.CompileModule(internal); err != nil { return nil, err } - ret := &CompiledCode{module: internal} - ret.addCacheEntry(internal, r.store.Engine) - return ret, nil + return &CompiledCode{module: internal, compiledEngine: r.store.Engine}, nil } // InstantiateModuleFromCode implements Runtime.InstantiateModuleFromCode @@ -213,14 +213,6 @@ func (r *runtime) InstantiateModuleWithConfig(code *CompiledCode, config *Module } module := config.replaceImports(code.module) - if module != code.module { - // If replacing imports had an effect, the module changed, so we have to recompile it. - // TODO: maybe we should move replaceImports configs into CompileModule. - if err = r.store.Engine.CompileModule(module); err != nil { - return nil, err - } - code.addCacheEntry(module, r.store.Engine) - } mod, err = r.store.Instantiate(r.ctx, module, name, sysCtx) if err != nil { diff --git a/wasm_test.go b/wasm_test.go index 65c4c844..125ac204 100644 --- a/wasm_test.go +++ b/wasm_test.go @@ -5,7 +5,6 @@ import ( _ "embed" "fmt" "math" - "strconv" "testing" "github.com/tetratelabs/wazero/api" @@ -61,6 +60,7 @@ func TestRuntime_DecodeModule(t *testing.T) { if tc.expectedName != "" { require.Equal(t, tc.expectedName, code.module.NameSection.ModuleName) } + require.Equal(t, r.(*runtime).store.Engine, code.compiledEngine) }) } } @@ -428,57 +428,26 @@ func requireImportAndExportFunction(t *testing.T, r Runtime, hostFn func(ctx api )), mod.Close } -func TestCompiledCode_addCacheEntry(t *testing.T) { - c := &CompiledCode{} - - m1, e1 := &wasm.Module{}, &mockEngine{name: "1"} - for i := 0; i < 5; i++ { - c.addCacheEntry(m1, e1) - } - - require.NotNil(t, c.cachedEngines[e1]) - require.NotNil(t, c.cachedEngines[e1][m1]) - require.Equal(t, 1, len(c.cachedEngines[e1])) - - m2, e2 := &wasm.Module{}, &mockEngine{name: "2"} - for i := 0; i < 5; i++ { - c.addCacheEntry(m2, e2) - } - - require.NotNil(t, c.cachedEngines[e1]) - require.NotNil(t, c.cachedEngines[e2]) - require.NotNil(t, c.cachedEngines[e1][m1]) - require.NotNil(t, c.cachedEngines[e2][m2]) - require.Equal(t, 1, len(c.cachedEngines[e1])) - require.Equal(t, 1, len(c.cachedEngines[e2])) -} - func TestCompiledCode_Close(t *testing.T) { - e1, e2 := &mockEngine{name: "1", cachedModules: map[*wasm.Module]struct{}{}}, - &mockEngine{name: "2", cachedModules: map[*wasm.Module]struct{}{}} + e := &mockEngine{name: "1", cachedModules: map[*wasm.Module]struct{}{}} - c := &CompiledCode{} - for _, e := range []wasm.Engine{e1, e2} { - for i := 0; i < 10; i++ { - m := &wasm.Module{} - _, _ = e.NewModuleEngine(strconv.Itoa(i), m, nil, nil, nil, nil) - c.addCacheEntry(m, e) - } + var cs []*CompiledCode + for i := 0; i < 10; i++ { + m := &wasm.Module{} + err := e.CompileModule(m) + require.NoError(t, err) + cs = append(cs, &CompiledCode{module: m, compiledEngine: e}) } // Before Close. - require.Equal(t, 10, len(e1.cachedModules)) - require.Equal(t, 10, len(e2.cachedModules)) - require.Equal(t, 2, len(c.cachedEngines)) - for _, modules := range c.cachedEngines { - require.Equal(t, 10, len(modules)) + require.Equal(t, 10, len(e.cachedModules)) + + for _, c := range cs { + c.Close() } - c.Close() - // After Close. - require.Zero(t, len(e1.cachedModules)) - require.Zero(t, len(e2.cachedModules)) + require.Zero(t, len(e.cachedModules)) } type mockEngine struct { @@ -487,8 +456,7 @@ type mockEngine struct { } // NewModuleEngine implements the same method as documented on wasm.Engine. -func (e *mockEngine) NewModuleEngine(_ string, module *wasm.Module, _, _ []*wasm.FunctionInstance, _ *wasm.TableInstance, _ map[wasm.Index]wasm.Index) (wasm.ModuleEngine, error) { - e.cachedModules[module] = struct{}{} +func (e *mockEngine) NewModuleEngine(_ string, _ *wasm.Module, _, _ []*wasm.FunctionInstance, _ *wasm.TableInstance, _ map[wasm.Index]wasm.Index) (wasm.ModuleEngine, error) { return nil, nil } @@ -497,4 +465,7 @@ func (e *mockEngine) DeleteCompiledModule(module *wasm.Module) { delete(e.cachedModules, module) } -func (e *mockEngine) CompileModule(module *wasm.Module) error { return nil } +func (e *mockEngine) CompileModule(module *wasm.Module) error { + e.cachedModules[module] = struct{}{} + return nil +}