Files
wazero/internal/wasm/binary/names.go
Crypt Keeper 2664b1eb62 Simplifies API per feedback (#427)
During #425, @neilalexander gave constructive feedback that the API is
both moving fast, and not good enough yet. This attempts to reduce the
incidental complexity at the cost of a little conflation.

### odd presence of `wasm` and `wasi` packages -> `api` package

We had public API packages in wasm and wasi, which helped us avoid
leaking too many internals as public. That these had names that look
like there should be implementations in them cause unnecessary
confusion. This squashes both into one package "api" which has no
package collission with anything.

We've long struggled with the poorly specified and non-uniformly
implemented WASI specification. Trying to bring visibility to its
constraints knowing they are routinely invalid taints our API for no
good reason. This removes all `WASI` commands for a default to invoke
the function `_start` if it exists. In doing so, there's only one path
to start a module.

Moreover, this puts all wasi code in a top-level package "wasi" as it
isn't re-imported by any internal types.

### Reuse of Module for pre and post instantiation to `Binary` -> `Module`

Module is defined by WebAssembly in many phases, from decoded to
instantiated. However, using the same noun in multiple packages is very
confusing. We at one point tried a name "DecodedModule" or
"InstantiatedModule", but this is a fools errand. By deviating slightly
from the spec we can make it unambiguous what a module is.

This make a result of compilation a `Binary`, retaining `Module` for an
instantiated one. In doing so, there's no longer any name conflicts
whatsoever.

### Confusion about config -> `ModuleConfig`

Also caused by splitting wasm into wasm+wasi is configuration. This
conflates both into the same type `ModuleConfig` as it is simpler than
trying to explain a "will never be finished" api of wasi snapshot-01 in
routine use of WebAssembly. In other words, this further moves WASI out
of the foreground as it has been nothing but burden.

```diff
--- a/README.md
+++ b/README.md
@@ -49,8 +49,8 @@ For example, here's how you can allow WebAssembly modules to read
-wm, err := r.InstantiateModule(wazero.WASISnapshotPreview1())
-defer wm.Close()
+wm, err := wasi.InstantiateSnapshotPreview1(r)
+defer wm.Close()

-sysConfig := wazero.NewSysConfig().WithFS(os.DirFS("/work/home"))
-module, err := wazero.StartWASICommandWithConfig(r, compiled, sysConfig)
+config := wazero.ModuleConfig().WithFS(os.DirFS("/work/home"))
+module, err := r.InstantiateModule(binary, config)
 defer module.Close()
 ...
```
2022-04-02 06:42:36 +08:00

229 lines
7.5 KiB
Go

package binary
import (
"bytes"
"fmt"
"io"
"github.com/tetratelabs/wazero/internal/leb128"
"github.com/tetratelabs/wazero/internal/wasm"
)
const (
// subsectionIDModuleName contains only the module name.
subsectionIDModuleName = uint8(0)
// subsectionIDFunctionNames is a map of indices to function names, in ascending order by function index
subsectionIDFunctionNames = uint8(1)
// subsectionIDLocalNames contain a map of function indices to a map of local indices to their names, in ascending
// order by function and local index
subsectionIDLocalNames = uint8(2)
)
// decodeNameSection deserializes the data associated with the "name" key in SectionIDCustom according to the
// standard:
//
// * ModuleName decode from subsection 0
// * FunctionNames decode from subsection 1
// * LocalNames decode from subsection 2
//
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
func decodeNameSection(r *bytes.Reader, limit uint64) (result *wasm.NameSection, err error) {
// TODO: add leb128 functions that work on []byte and offset. While using a reader allows us to reuse reader-based
// leb128 functions, it is less efficient, causes untestable code and in some cases more complex vs plain []byte.
result = &wasm.NameSection{}
// subsectionID is decoded if known, and skipped if not
var subsectionID uint8
// subsectionSize is the length to skip when the subsectionID is unknown
var subsectionSize uint32
var bytesRead uint64
for limit > 0 {
if subsectionID, err = r.ReadByte(); err != nil {
if err == io.EOF {
return result, nil
}
// TODO: untestable as this can't fail for a reason beside EOF reading a byte from a buffer
return nil, fmt.Errorf("failed to read a subsection ID: %w", err)
}
limit--
if subsectionSize, bytesRead, err = leb128.DecodeUint32(r); err != nil {
return nil, fmt.Errorf("failed to read the size of subsection[%d]: %w", subsectionID, err)
}
limit -= bytesRead
switch subsectionID {
case subsectionIDModuleName:
if result.ModuleName, _, err = decodeUTF8(r, "module name"); err != nil {
return nil, err
}
case subsectionIDFunctionNames:
if result.FunctionNames, err = decodeFunctionNames(r); err != nil {
return nil, err
}
case subsectionIDLocalNames:
if result.LocalNames, err = decodeLocalNames(r); err != nil {
return nil, err
}
default: // Skip other subsections.
// Note: Not Seek because it doesn't err when given an offset past EOF. Rather, it leads to undefined state.
if _, err = io.CopyN(io.Discard, r, int64(subsectionSize)); err != nil {
return nil, fmt.Errorf("failed to skip subsection[%d]: %w", subsectionID, err)
}
}
limit -= uint64(subsectionSize)
}
return
}
func decodeFunctionNames(r *bytes.Reader) (wasm.NameMap, error) {
functionCount, err := decodeFunctionCount(r, subsectionIDFunctionNames)
if err != nil {
return nil, err
}
result := make(wasm.NameMap, functionCount)
for i := uint32(0); i < functionCount; i++ {
functionIndex, err := decodeFunctionIndex(r, subsectionIDFunctionNames)
if err != nil {
return nil, err
}
name, _, err := decodeUTF8(r, "function[%d] name", functionIndex)
if err != nil {
return nil, err
}
result[i] = &wasm.NameAssoc{Index: functionIndex, Name: name}
}
return result, nil
}
func decodeLocalNames(r *bytes.Reader) (wasm.IndirectNameMap, error) {
functionCount, err := decodeFunctionCount(r, subsectionIDLocalNames)
if err != nil {
return nil, err
}
result := make(wasm.IndirectNameMap, functionCount)
for i := uint32(0); i < functionCount; i++ {
functionIndex, err := decodeFunctionIndex(r, subsectionIDLocalNames)
if err != nil {
return nil, err
}
localCount, _, err := leb128.DecodeUint32(r)
if err != nil {
return nil, fmt.Errorf("failed to read the local count for function[%d]: %w", functionIndex, err)
}
locals := make(wasm.NameMap, localCount)
for j := uint32(0); j < localCount; j++ {
localIndex, _, err := leb128.DecodeUint32(r)
if err != nil {
return nil, fmt.Errorf("failed to read a local index of function[%d]: %w", functionIndex, err)
}
name, _, err := decodeUTF8(r, "function[%d] local[%d] name", functionIndex, localIndex)
if err != nil {
return nil, err
}
locals[j] = &wasm.NameAssoc{Index: localIndex, Name: name}
}
result[i] = &wasm.NameMapAssoc{Index: functionIndex, NameMap: locals}
}
return result, nil
}
func decodeFunctionIndex(r *bytes.Reader, subsectionID uint8) (uint32, error) {
functionIndex, _, err := leb128.DecodeUint32(r)
if err != nil {
return 0, fmt.Errorf("failed to read a function index in subsection[%d]: %w", subsectionID, err)
}
return functionIndex, nil
}
func decodeFunctionCount(r *bytes.Reader, subsectionID uint8) (uint32, error) {
functionCount, _, err := leb128.DecodeUint32(r)
if err != nil {
return 0, fmt.Errorf("failed to read the function count of subsection[%d]: %w", subsectionID, err)
}
return functionCount, nil
}
// encodeNameSectionData serializes the data for the "name" key in wasm.SectionIDCustom according to the
// standard:
//
// Note: The result can be nil because this does not encode empty subsections
//
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
func encodeNameSectionData(n *wasm.NameSection) (data []byte) {
if n.ModuleName != "" {
data = append(data, encodeNameSubsection(subsectionIDModuleName, encodeSizePrefixed([]byte(n.ModuleName)))...)
}
if fd := encodeFunctionNameData(n); len(fd) > 0 {
data = append(data, encodeNameSubsection(subsectionIDFunctionNames, fd)...)
}
if ld := encodeLocalNameData(n); len(ld) > 0 {
data = append(data, encodeNameSubsection(subsectionIDLocalNames, ld)...)
}
return
}
// encodeFunctionNameData encodes the data for the function name subsection.
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-funcnamesec
func encodeFunctionNameData(n *wasm.NameSection) []byte {
if len(n.FunctionNames) == 0 {
return nil
}
return encodeNameMap(n.FunctionNames)
}
func encodeNameMap(m wasm.NameMap) []byte {
count := uint32(len(m))
data := leb128.EncodeUint32(count)
for _, na := range m {
data = append(data, encodeNameAssoc(na)...)
}
return data
}
// encodeLocalNameData encodes the data for the local name subsection.
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-localnamesec
func encodeLocalNameData(n *wasm.NameSection) []byte {
if len(n.LocalNames) == 0 {
return nil
}
funcNameCount := uint32(len(n.LocalNames))
subsection := leb128.EncodeUint32(funcNameCount)
for _, na := range n.LocalNames {
locals := encodeNameMap(na.NameMap)
subsection = append(subsection, append(leb128.EncodeUint32(na.Index), locals...)...)
}
return subsection
}
// encodeNameSubsection returns a buffer encoding the given subsection
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#subsections%E2%91%A0
func encodeNameSubsection(subsectionID uint8, content []byte) []byte {
contentSizeInBytes := leb128.EncodeUint32(uint32(len(content)))
result := []byte{subsectionID}
result = append(result, contentSizeInBytes...)
result = append(result, content...)
return result
}
// encodeNameAssoc encodes the index and data prefixed by their size.
// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namemap
func encodeNameAssoc(na *wasm.NameAssoc) []byte {
return append(leb128.EncodeUint32(na.Index), encodeSizePrefixed([]byte(na.Name))...)
}
// encodeSizePrefixed encodes the data prefixed by their size.
func encodeSizePrefixed(data []byte) []byte {
size := leb128.EncodeUint32(uint32(len(data)))
return append(size, data...)
}