wasi: optimizes args/environ parsing (#885)

While most compilers will only read args/environ once, tools like WAGI
make heavy use of environment, possibly dozens of long variables. This
optimizes both args and environ for this reason and also to setup for
optimizing other functions.

Here are the notable changes:
* eagerly coerce to byte slices instead of strings
* re-use null terminated length for writing values
* avoid loops that call mem.WriteXXX internally

Signed-off-by: Adrian Cole <adrian@tetrate.io>
This commit is contained in:
Crypt Keeper
2022-12-05 11:07:51 +08:00
committed by GitHub
parent dca3bfa683
commit 6f30a42828
9 changed files with 115 additions and 89 deletions

View File

@@ -462,9 +462,9 @@ type moduleConfig struct {
nanotime *sys.Nanotime nanotime *sys.Nanotime
nanotimeResolution sys.ClockResolution nanotimeResolution sys.ClockResolution
nanosleep *sys.Nanosleep nanosleep *sys.Nanosleep
args []string args [][]byte
// environ is pair-indexed to retain order similar to os.Environ. // environ is pair-indexed to retain order similar to os.Environ.
environ []string environ [][]byte
// environKeys allow overwriting of existing values. // environKeys allow overwriting of existing values.
environKeys map[string]int environKeys map[string]int
// fs is the file system to open files with // fs is the file system to open files with
@@ -492,19 +492,30 @@ func (c *moduleConfig) clone() *moduleConfig {
// WithArgs implements ModuleConfig.WithArgs // WithArgs implements ModuleConfig.WithArgs
func (c *moduleConfig) WithArgs(args ...string) ModuleConfig { func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
ret := c.clone() ret := c.clone()
ret.args = args ret.args = toByteSlices(args)
return ret return ret
} }
func toByteSlices(strings []string) (result [][]byte) {
if len(strings) == 0 {
return
}
result = make([][]byte, len(strings))
for i, a := range strings {
result[i] = []byte(a)
}
return
}
// WithEnv implements ModuleConfig.WithEnv // WithEnv implements ModuleConfig.WithEnv
func (c *moduleConfig) WithEnv(key, value string) ModuleConfig { func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
ret := c.clone() ret := c.clone()
// Check to see if this key already exists and update it. // Check to see if this key already exists and update it.
if i, ok := ret.environKeys[key]; ok { if i, ok := ret.environKeys[key]; ok {
ret.environ[i+1] = value // environ is pair-indexed, so the value is 1 after the key. ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
} else { } else {
ret.environKeys[key] = len(ret.environ) ret.environKeys[key] = len(ret.environ)
ret.environ = append(ret.environ, key, value) ret.environ = append(ret.environ, []byte(key), []byte(value))
} }
return ret return ret
} }
@@ -602,21 +613,29 @@ func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
// toSysContext creates a baseline wasm.Context configured by ModuleConfig. // toSysContext creates a baseline wasm.Context configured by ModuleConfig.
func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) { func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
var environ []string // Intentionally doesn't pre-allocate to reduce logic to default to nil. var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
// Same validation as syscall.Setenv for Linux // Same validation as syscall.Setenv for Linux
for i := 0; i < len(c.environ); i += 2 { for i := 0; i < len(c.environ); i += 2 {
key, value := c.environ[i], c.environ[i+1] key, value := c.environ[i], c.environ[i+1]
if len(key) == 0 { keyLen := len(key)
if keyLen == 0 {
err = errors.New("environ invalid: empty key") err = errors.New("environ invalid: empty key")
return return
} }
for j := 0; j < len(key); j++ { valueLen := len(value)
if key[j] == '=' { // NUL enforced in NewContext result := make([]byte, keyLen+valueLen+1)
j := 0
for ; j < keyLen; j++ {
if k := key[j]; k == '=' { // NUL enforced in NewContext
err = errors.New("environ invalid: key contains '=' character") err = errors.New("environ invalid: key contains '=' character")
return return
} else {
result[j] = k
} }
} }
environ = append(environ, key+"="+value) result[j] = '='
copy(result[j+1:], value)
environ = append(environ, result)
} }
return internalsys.NewContext( return internalsys.NewContext(

View File

@@ -639,8 +639,8 @@ func requireSysContext(
) *internalsys.Context { ) *internalsys.Context {
sysCtx, err := internalsys.NewContext( sysCtx, err := internalsys.NewContext(
max, max,
args, toByteSlices(args),
environ, toByteSlices(environ),
stdin, stdin,
stdout, stdout,
stderr, stderr,

View File

@@ -59,7 +59,7 @@ var argsGet = &wasm.HostFunc{
func argsGetFn(ctx context.Context, mod api.Module, params []uint64) Errno { func argsGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
sysCtx := mod.(*wasm.CallContext).Sys sysCtx := mod.(*wasm.CallContext).Sys
argv, argvBuf := uint32(params[0]), uint32(params[1]) argv, argvBuf := uint32(params[0]), uint32(params[1])
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf) return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf, sysCtx.ArgsSize())
} }
// argsSizesGet is the WASI function named functionArgsSizesGet that reads // argsSizesGet is the WASI function named functionArgsSizesGet that reads
@@ -108,7 +108,8 @@ func argsSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Errno
mem := mod.Memory() mem := mod.Memory()
resultArgc, resultArgvLen := uint32(params[0]), uint32(params[1]) resultArgc, resultArgvLen := uint32(params[0]), uint32(params[1])
// Write the Errno back to the stack // argc and argv_len offsets are not necessarily sequential, so we have to
// write them independently.
if !mem.WriteUint32Le(ctx, resultArgc, uint32(len(sysCtx.Args()))) { if !mem.WriteUint32Le(ctx, resultArgc, uint32(len(sysCtx.Args()))) {
return ErrnoFault return ErrnoFault
} }

View File

@@ -60,7 +60,7 @@ func environGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
sysCtx := mod.(*wasm.CallContext).Sys sysCtx := mod.(*wasm.CallContext).Sys
environ, environBuf := uint32(params[0]), uint32(params[1]) environ, environBuf := uint32(params[0]), uint32(params[1])
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf) return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf, sysCtx.EnvironSize())
} }
// environSizesGet is the WASI function named functionEnvironSizesGet that // environSizesGet is the WASI function named functionEnvironSizesGet that
@@ -111,6 +111,8 @@ func environSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Err
mem := mod.Memory() mem := mod.Memory()
resultEnvironc, resultEnvironvLen := uint32(params[0]), uint32(params[1]) resultEnvironc, resultEnvironvLen := uint32(params[0]), uint32(params[1])
// environc and environv_len offsets are not necessarily sequential, so we
// have to write them independently.
if !mem.WriteUint32Le(ctx, resultEnvironc, uint32(len(sysCtx.Environ()))) { if !mem.WriteUint32Le(ctx, resultEnvironc, uint32(len(sysCtx.Environ()))) {
return ErrnoFault return ErrnoFault
} }

View File

@@ -215,23 +215,41 @@ func exportFunctions(builder wazero.HostModuleBuilder) {
exporter.ExportHostFunc(sockShutdown) exporter.ExportHostFunc(sockShutdown)
} }
func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values []string, offsets, bytes uint32) Errno { // writeOffsetsAndNullTerminatedValues is used to write NUL-terminated values
// for args or environ, given a pre-defined bytesLen (which includes NUL
// terminators).
func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values [][]byte, offsets, bytes, bytesLen uint32) Errno {
// The caller may not place bytes directly after offsets, so we have to
// read them independently.
valuesLen := len(values)
offsetsLen := uint32(valuesLen * 4) // uint32Le
offsetsBuf, ok := mem.Read(ctx, offsets, offsetsLen)
if !ok {
return ErrnoFault
}
bytesBuf, ok := mem.Read(ctx, bytes, bytesLen)
if !ok {
return ErrnoFault
}
// Loop through the values, first writing the location of its data to
// offsetsBuf[oI], then its NUL-terminated data at bytesBuf[bI]
var oI, bI uint32
for _, value := range values { for _, value := range values {
// Write current offset and advance it. // Go can't guarantee inlining as there's not //go:inline directive.
if !mem.WriteUint32Le(ctx, offsets, bytes) { // This inlines uint32 little-endian encoding instead.
return ErrnoFault bytesOffset := bytes + bI
} offsetsBuf[oI] = byte(bytesOffset)
offsets += 4 // size of uint32 offsetsBuf[oI+1] = byte(bytesOffset >> 8)
offsetsBuf[oI+2] = byte(bytesOffset >> 16)
offsetsBuf[oI+3] = byte(bytesOffset >> 24)
oI += 4 // size of uint32 we just wrote
// Write the next value to memory with a NUL terminator // Write the next value to memory with a NUL terminator
if !mem.Write(ctx, bytes, []byte(value)) { copy(bytesBuf[bI:], value)
return ErrnoFault bI += uint32(len(value))
} bytesBuf[bI] = 0 // NUL terminator
bytes += uint32(len(value)) bI++
if !mem.WriteByte(ctx, bytes, 0) {
return ErrnoFault
}
bytes++
} }
return ErrnoSuccess return ErrnoSuccess

View File

@@ -6,60 +6,46 @@ import (
"github.com/tetratelabs/wazero" "github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api" "github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/internal/testing/proxy" "github.com/tetratelabs/wazero/internal/testing/proxy"
"github.com/tetratelabs/wazero/internal/testing/require"
) )
var testMem = []byte{ // configArgsEnviron ensures the result data are the same between args and ENV.
0, // environBuf is after this var configArgsEnviron = wazero.NewModuleConfig().
'a', '=', 'b', 0, // null terminated "a=b", WithArgs("aa=bbbb", "cccccc=dddddddd", "eeeeeeeeee=ffffffffffff").
'b', '=', 'c', 'd', 0, // null terminated "b=cd" WithEnv("aa", "bbbb").
0, // environ is after this WithEnv("cccccc", "dddddddd").
1, 0, 0, 0, // little endian-encoded offset of "a=b" WithEnv("eeeeeeeeee", "ffffffffffff")
5, 0, 0, 0, // little endian-encoded offset of "b=cd"
0,
}
func Test_Benchmark_EnvironGet(t *testing.T) { func Benchmark_ArgsEnviron(b *testing.B) {
mod, r, log := requireProxyModule(t, wazero.NewModuleConfig().
WithEnv("a", "b").WithEnv("b", "cd"))
defer r.Close(testCtx)
// Invoke environGet and check the memory side effects.
requireErrno(t, ErrnoSuccess, mod, functionEnvironGet, uint64(11), uint64(1))
require.Equal(t, `
--> proxy.environ_get(environ=11,environ_buf=1)
==> wasi_snapshot_preview1.environ_get(environ=11,environ_buf=1)
<== ESUCCESS
<-- (0)
`, "\n"+log.String())
mem, ok := mod.Memory().Read(testCtx, 0, uint32(len(testMem)))
require.True(t, ok)
require.Equal(t, testMem, mem)
}
func Benchmark_EnvironGet(b *testing.B) {
r := wazero.NewRuntime(testCtx) r := wazero.NewRuntime(testCtx)
defer r.Close(testCtx) defer r.Close(testCtx)
mod, err := instantiateProxyModule(r, wazero.NewModuleConfig(). mod, err := instantiateProxyModule(r, configArgsEnviron)
WithEnv("a", "b").WithEnv("b", "cd"))
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
b.Run("environGet", func(b *testing.B) { for _, n := range []string{
for i := 0; i < b.N; i++ { functionArgsGet,
results, err := mod.ExportedFunction(functionEnvironGet).Call(testCtx, uint64(0), uint64(4)) functionArgsSizesGet,
if err != nil { functionEnvironGet,
b.Fatal(err) functionEnvironSizesGet,
} {
n := n
fn := mod.ExportedFunction(n)
b.Run(n, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
results, err := fn.Call(testCtx, uint64(0), uint64(4))
if err != nil {
b.Fatal(err)
}
errno := Errno(results[0])
if errno != 0 {
b.Fatal(ErrnoName(errno))
}
} }
errno := Errno(results[0]) })
if errno != ErrnoSuccess { }
b.Fatal(ErrnoName(errno))
}
}
})
} }
// instantiateProxyModule instantiates a guest that re-exports WASI functions. // instantiateProxyModule instantiates a guest that re-exports WASI functions.

View File

@@ -26,10 +26,10 @@ func WriteArgsAndEnviron(ctx context.Context, mod api.Module) (argc, argv uint32
argc = uint32(len(args)) argc = uint32(len(args))
offset := endOfPageZero offset := endOfPageZero
strPtr := func(val, field string, i int) (ptr uint32) { strPtr := func(val []byte, field string, i int) (ptr uint32) {
// TODO: return err and format "%s[%d], field, i" // TODO: return err and format "%s[%d], field, i"
ptr = offset ptr = offset
mustWrite(ctx, mem, field, offset, append([]byte(val), 0)) mustWrite(ctx, mem, field, offset, append(val, 0))
offset += uint32(len(val) + 1) offset += uint32(len(val) + 1)
if pad := offset % 8; pad != 0 { if pad := offset % 8; pad != 0 {
offset += 8 - pad offset += 8 - pad

View File

@@ -15,7 +15,7 @@ import (
// Context holds module-scoped system resources currently only supported by // Context holds module-scoped system resources currently only supported by
// built-in host functions. // built-in host functions.
type Context struct { type Context struct {
args, environ []string args, environ [][]byte
argsSize, environSize uint32 argsSize, environSize uint32
stdin io.Reader stdin io.Reader
stdout, stderr io.Writer stdout, stderr io.Writer
@@ -35,7 +35,7 @@ type Context struct {
// //
// Note: The count will never be more than math.MaxUint32. // Note: The count will never be more than math.MaxUint32.
// See wazero.ModuleConfig WithArgs // See wazero.ModuleConfig WithArgs
func (c *Context) Args() []string { func (c *Context) Args() [][]byte {
return c.args return c.args
} }
@@ -52,7 +52,7 @@ func (c *Context) ArgsSize() uint32 {
// //
// Note: The count will never be more than math.MaxUint32. // Note: The count will never be more than math.MaxUint32.
// See wazero.ModuleConfig WithEnv // See wazero.ModuleConfig WithEnv
func (c *Context) Environ() []string { func (c *Context) Environ() [][]byte {
return c.environ return c.environ
} }
@@ -150,7 +150,7 @@ var (
// Note: max is exposed for testing. max is only used for env/args validation. // Note: max is exposed for testing. max is only used for env/args validation.
func NewContext( func NewContext(
max uint32, max uint32,
args, environ []string, args, environ [][]byte,
stdin io.Reader, stdin io.Reader,
stdout, stderr io.Writer, stdout, stderr io.Writer,
randSource io.Reader, randSource io.Reader,
@@ -239,7 +239,7 @@ func clockResolutionInvalid(resolution sys.ClockResolution) bool {
// nullTerminatedByteCount ensures the count or Nul-terminated length of the elements doesn't exceed max, and that no // nullTerminatedByteCount ensures the count or Nul-terminated length of the elements doesn't exceed max, and that no
// element includes the nul character. // element includes the nul character.
func nullTerminatedByteCount(max uint32, elements []string) (uint32, error) { func nullTerminatedByteCount(max uint32, elements [][]byte) (uint32, error) {
count := uint32(len(elements)) count := uint32(len(elements))
if count > max { if count > max {
return 0, errors.New("exceeds maximum count") return 0, errors.New("exceeds maximum count")

View File

@@ -61,7 +61,7 @@ func TestDefaultSysContext(t *testing.T) {
func TestNewContext_Args(t *testing.T) { func TestNewContext_Args(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
args []string args [][]byte
maxSize uint32 maxSize uint32
expectedSize uint32 expectedSize uint32
expectedErr string expectedErr string
@@ -69,25 +69,25 @@ func TestNewContext_Args(t *testing.T) {
{ {
name: "ok", name: "ok",
maxSize: 10, maxSize: 10,
args: []string{"a", "bc"}, args: [][]byte{[]byte("a"), []byte("bc")},
expectedSize: 5, expectedSize: 5,
}, },
{ {
name: "exceeds max count", name: "exceeds max count",
maxSize: 1, maxSize: 1,
args: []string{"a", "bc"}, args: [][]byte{[]byte("a"), []byte("bc")},
expectedErr: "args invalid: exceeds maximum count", expectedErr: "args invalid: exceeds maximum count",
}, },
{ {
name: "exceeds max size", name: "exceeds max size",
maxSize: 4, maxSize: 4,
args: []string{"a", "bc"}, args: [][]byte{[]byte("a"), []byte("bc")},
expectedErr: "args invalid: exceeds maximum size", expectedErr: "args invalid: exceeds maximum size",
}, },
{ {
name: "null character", name: "null character",
maxSize: 10, maxSize: 10,
args: []string{"a", string([]byte{'b', 0})}, args: [][]byte{[]byte("a"), {'b', 0}},
expectedErr: "args invalid: contains NUL character", expectedErr: "args invalid: contains NUL character",
}, },
} }
@@ -123,7 +123,7 @@ func TestNewContext_Args(t *testing.T) {
func TestNewContext_Environ(t *testing.T) { func TestNewContext_Environ(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
environ []string environ [][]byte
maxSize uint32 maxSize uint32
expectedSize uint32 expectedSize uint32
expectedErr string expectedErr string
@@ -131,25 +131,25 @@ func TestNewContext_Environ(t *testing.T) {
{ {
name: "ok", name: "ok",
maxSize: 10, maxSize: 10,
environ: []string{"a=b", "c=de"}, environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedSize: 9, expectedSize: 9,
}, },
{ {
name: "exceeds max count", name: "exceeds max count",
maxSize: 1, maxSize: 1,
environ: []string{"a=b", "c=de"}, environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedErr: "environ invalid: exceeds maximum count", expectedErr: "environ invalid: exceeds maximum count",
}, },
{ {
name: "exceeds max size", name: "exceeds max size",
maxSize: 4, maxSize: 4,
environ: []string{"a=b", "c=de"}, environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedErr: "environ invalid: exceeds maximum size", expectedErr: "environ invalid: exceeds maximum size",
}, },
{ {
name: "null character", name: "null character",
maxSize: 10, maxSize: 10,
environ: []string{"a=b", string(append([]byte("c=d"), 0))}, environ: [][]byte{[]byte("a=b"), append([]byte("c=d"), 0)},
expectedErr: "environ invalid: contains NUL character", expectedErr: "environ invalid: contains NUL character",
}, },
} }