wasi: optimizes args/environ parsing (#885)

While most compilers will only read args/environ once, tools like WAGI
make heavy use of environment, possibly dozens of long variables. This
optimizes both args and environ for this reason and also to setup for
optimizing other functions.

Here are the notable changes:
* eagerly coerce to byte slices instead of strings
* re-use null terminated length for writing values
* avoid loops that call mem.WriteXXX internally

Signed-off-by: Adrian Cole <adrian@tetrate.io>
This commit is contained in:
Crypt Keeper
2022-12-05 11:07:51 +08:00
committed by GitHub
parent dca3bfa683
commit 6f30a42828
9 changed files with 115 additions and 89 deletions

View File

@@ -462,9 +462,9 @@ type moduleConfig struct {
nanotime *sys.Nanotime
nanotimeResolution sys.ClockResolution
nanosleep *sys.Nanosleep
args []string
args [][]byte
// environ is pair-indexed to retain order similar to os.Environ.
environ []string
environ [][]byte
// environKeys allow overwriting of existing values.
environKeys map[string]int
// fs is the file system to open files with
@@ -492,19 +492,30 @@ func (c *moduleConfig) clone() *moduleConfig {
// WithArgs implements ModuleConfig.WithArgs
func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
ret := c.clone()
ret.args = args
ret.args = toByteSlices(args)
return ret
}
func toByteSlices(strings []string) (result [][]byte) {
if len(strings) == 0 {
return
}
result = make([][]byte, len(strings))
for i, a := range strings {
result[i] = []byte(a)
}
return
}
// WithEnv implements ModuleConfig.WithEnv
func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
ret := c.clone()
// Check to see if this key already exists and update it.
if i, ok := ret.environKeys[key]; ok {
ret.environ[i+1] = value // environ is pair-indexed, so the value is 1 after the key.
ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
} else {
ret.environKeys[key] = len(ret.environ)
ret.environ = append(ret.environ, key, value)
ret.environ = append(ret.environ, []byte(key), []byte(value))
}
return ret
}
@@ -602,21 +613,29 @@ func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
// toSysContext creates a baseline wasm.Context configured by ModuleConfig.
func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
var environ []string // Intentionally doesn't pre-allocate to reduce logic to default to nil.
var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
// Same validation as syscall.Setenv for Linux
for i := 0; i < len(c.environ); i += 2 {
key, value := c.environ[i], c.environ[i+1]
if len(key) == 0 {
keyLen := len(key)
if keyLen == 0 {
err = errors.New("environ invalid: empty key")
return
}
for j := 0; j < len(key); j++ {
if key[j] == '=' { // NUL enforced in NewContext
valueLen := len(value)
result := make([]byte, keyLen+valueLen+1)
j := 0
for ; j < keyLen; j++ {
if k := key[j]; k == '=' { // NUL enforced in NewContext
err = errors.New("environ invalid: key contains '=' character")
return
} else {
result[j] = k
}
}
environ = append(environ, key+"="+value)
result[j] = '='
copy(result[j+1:], value)
environ = append(environ, result)
}
return internalsys.NewContext(

View File

@@ -639,8 +639,8 @@ func requireSysContext(
) *internalsys.Context {
sysCtx, err := internalsys.NewContext(
max,
args,
environ,
toByteSlices(args),
toByteSlices(environ),
stdin,
stdout,
stderr,

View File

@@ -59,7 +59,7 @@ var argsGet = &wasm.HostFunc{
func argsGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
sysCtx := mod.(*wasm.CallContext).Sys
argv, argvBuf := uint32(params[0]), uint32(params[1])
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf)
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf, sysCtx.ArgsSize())
}
// argsSizesGet is the WASI function named functionArgsSizesGet that reads
@@ -108,7 +108,8 @@ func argsSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Errno
mem := mod.Memory()
resultArgc, resultArgvLen := uint32(params[0]), uint32(params[1])
// Write the Errno back to the stack
// argc and argv_len offsets are not necessarily sequential, so we have to
// write them independently.
if !mem.WriteUint32Le(ctx, resultArgc, uint32(len(sysCtx.Args()))) {
return ErrnoFault
}

View File

@@ -60,7 +60,7 @@ func environGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
sysCtx := mod.(*wasm.CallContext).Sys
environ, environBuf := uint32(params[0]), uint32(params[1])
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf)
return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf, sysCtx.EnvironSize())
}
// environSizesGet is the WASI function named functionEnvironSizesGet that
@@ -111,6 +111,8 @@ func environSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Err
mem := mod.Memory()
resultEnvironc, resultEnvironvLen := uint32(params[0]), uint32(params[1])
// environc and environv_len offsets are not necessarily sequential, so we
// have to write them independently.
if !mem.WriteUint32Le(ctx, resultEnvironc, uint32(len(sysCtx.Environ()))) {
return ErrnoFault
}

View File

@@ -215,23 +215,41 @@ func exportFunctions(builder wazero.HostModuleBuilder) {
exporter.ExportHostFunc(sockShutdown)
}
func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values []string, offsets, bytes uint32) Errno {
// writeOffsetsAndNullTerminatedValues is used to write NUL-terminated values
// for args or environ, given a pre-defined bytesLen (which includes NUL
// terminators).
func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values [][]byte, offsets, bytes, bytesLen uint32) Errno {
// The caller may not place bytes directly after offsets, so we have to
// read them independently.
valuesLen := len(values)
offsetsLen := uint32(valuesLen * 4) // uint32Le
offsetsBuf, ok := mem.Read(ctx, offsets, offsetsLen)
if !ok {
return ErrnoFault
}
bytesBuf, ok := mem.Read(ctx, bytes, bytesLen)
if !ok {
return ErrnoFault
}
// Loop through the values, first writing the location of its data to
// offsetsBuf[oI], then its NUL-terminated data at bytesBuf[bI]
var oI, bI uint32
for _, value := range values {
// Write current offset and advance it.
if !mem.WriteUint32Le(ctx, offsets, bytes) {
return ErrnoFault
}
offsets += 4 // size of uint32
// Go can't guarantee inlining as there's not //go:inline directive.
// This inlines uint32 little-endian encoding instead.
bytesOffset := bytes + bI
offsetsBuf[oI] = byte(bytesOffset)
offsetsBuf[oI+1] = byte(bytesOffset >> 8)
offsetsBuf[oI+2] = byte(bytesOffset >> 16)
offsetsBuf[oI+3] = byte(bytesOffset >> 24)
oI += 4 // size of uint32 we just wrote
// Write the next value to memory with a NUL terminator
if !mem.Write(ctx, bytes, []byte(value)) {
return ErrnoFault
}
bytes += uint32(len(value))
if !mem.WriteByte(ctx, bytes, 0) {
return ErrnoFault
}
bytes++
copy(bytesBuf[bI:], value)
bI += uint32(len(value))
bytesBuf[bI] = 0 // NUL terminator
bI++
}
return ErrnoSuccess

View File

@@ -6,60 +6,46 @@ import (
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/internal/testing/proxy"
"github.com/tetratelabs/wazero/internal/testing/require"
)
var testMem = []byte{
0, // environBuf is after this
'a', '=', 'b', 0, // null terminated "a=b",
'b', '=', 'c', 'd', 0, // null terminated "b=cd"
0, // environ is after this
1, 0, 0, 0, // little endian-encoded offset of "a=b"
5, 0, 0, 0, // little endian-encoded offset of "b=cd"
0,
}
// configArgsEnviron ensures the result data are the same between args and ENV.
var configArgsEnviron = wazero.NewModuleConfig().
WithArgs("aa=bbbb", "cccccc=dddddddd", "eeeeeeeeee=ffffffffffff").
WithEnv("aa", "bbbb").
WithEnv("cccccc", "dddddddd").
WithEnv("eeeeeeeeee", "ffffffffffff")
func Test_Benchmark_EnvironGet(t *testing.T) {
mod, r, log := requireProxyModule(t, wazero.NewModuleConfig().
WithEnv("a", "b").WithEnv("b", "cd"))
defer r.Close(testCtx)
// Invoke environGet and check the memory side effects.
requireErrno(t, ErrnoSuccess, mod, functionEnvironGet, uint64(11), uint64(1))
require.Equal(t, `
--> proxy.environ_get(environ=11,environ_buf=1)
==> wasi_snapshot_preview1.environ_get(environ=11,environ_buf=1)
<== ESUCCESS
<-- (0)
`, "\n"+log.String())
mem, ok := mod.Memory().Read(testCtx, 0, uint32(len(testMem)))
require.True(t, ok)
require.Equal(t, testMem, mem)
}
func Benchmark_EnvironGet(b *testing.B) {
func Benchmark_ArgsEnviron(b *testing.B) {
r := wazero.NewRuntime(testCtx)
defer r.Close(testCtx)
mod, err := instantiateProxyModule(r, wazero.NewModuleConfig().
WithEnv("a", "b").WithEnv("b", "cd"))
mod, err := instantiateProxyModule(r, configArgsEnviron)
if err != nil {
b.Fatal(err)
}
b.Run("environGet", func(b *testing.B) {
for i := 0; i < b.N; i++ {
results, err := mod.ExportedFunction(functionEnvironGet).Call(testCtx, uint64(0), uint64(4))
if err != nil {
b.Fatal(err)
for _, n := range []string{
functionArgsGet,
functionArgsSizesGet,
functionEnvironGet,
functionEnvironSizesGet,
} {
n := n
fn := mod.ExportedFunction(n)
b.Run(n, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
results, err := fn.Call(testCtx, uint64(0), uint64(4))
if err != nil {
b.Fatal(err)
}
errno := Errno(results[0])
if errno != 0 {
b.Fatal(ErrnoName(errno))
}
}
errno := Errno(results[0])
if errno != ErrnoSuccess {
b.Fatal(ErrnoName(errno))
}
}
})
})
}
}
// instantiateProxyModule instantiates a guest that re-exports WASI functions.

View File

@@ -26,10 +26,10 @@ func WriteArgsAndEnviron(ctx context.Context, mod api.Module) (argc, argv uint32
argc = uint32(len(args))
offset := endOfPageZero
strPtr := func(val, field string, i int) (ptr uint32) {
strPtr := func(val []byte, field string, i int) (ptr uint32) {
// TODO: return err and format "%s[%d], field, i"
ptr = offset
mustWrite(ctx, mem, field, offset, append([]byte(val), 0))
mustWrite(ctx, mem, field, offset, append(val, 0))
offset += uint32(len(val) + 1)
if pad := offset % 8; pad != 0 {
offset += 8 - pad

View File

@@ -15,7 +15,7 @@ import (
// Context holds module-scoped system resources currently only supported by
// built-in host functions.
type Context struct {
args, environ []string
args, environ [][]byte
argsSize, environSize uint32
stdin io.Reader
stdout, stderr io.Writer
@@ -35,7 +35,7 @@ type Context struct {
//
// Note: The count will never be more than math.MaxUint32.
// See wazero.ModuleConfig WithArgs
func (c *Context) Args() []string {
func (c *Context) Args() [][]byte {
return c.args
}
@@ -52,7 +52,7 @@ func (c *Context) ArgsSize() uint32 {
//
// Note: The count will never be more than math.MaxUint32.
// See wazero.ModuleConfig WithEnv
func (c *Context) Environ() []string {
func (c *Context) Environ() [][]byte {
return c.environ
}
@@ -150,7 +150,7 @@ var (
// Note: max is exposed for testing. max is only used for env/args validation.
func NewContext(
max uint32,
args, environ []string,
args, environ [][]byte,
stdin io.Reader,
stdout, stderr io.Writer,
randSource io.Reader,
@@ -239,7 +239,7 @@ func clockResolutionInvalid(resolution sys.ClockResolution) bool {
// nullTerminatedByteCount ensures the count or Nul-terminated length of the elements doesn't exceed max, and that no
// element includes the nul character.
func nullTerminatedByteCount(max uint32, elements []string) (uint32, error) {
func nullTerminatedByteCount(max uint32, elements [][]byte) (uint32, error) {
count := uint32(len(elements))
if count > max {
return 0, errors.New("exceeds maximum count")

View File

@@ -61,7 +61,7 @@ func TestDefaultSysContext(t *testing.T) {
func TestNewContext_Args(t *testing.T) {
tests := []struct {
name string
args []string
args [][]byte
maxSize uint32
expectedSize uint32
expectedErr string
@@ -69,25 +69,25 @@ func TestNewContext_Args(t *testing.T) {
{
name: "ok",
maxSize: 10,
args: []string{"a", "bc"},
args: [][]byte{[]byte("a"), []byte("bc")},
expectedSize: 5,
},
{
name: "exceeds max count",
maxSize: 1,
args: []string{"a", "bc"},
args: [][]byte{[]byte("a"), []byte("bc")},
expectedErr: "args invalid: exceeds maximum count",
},
{
name: "exceeds max size",
maxSize: 4,
args: []string{"a", "bc"},
args: [][]byte{[]byte("a"), []byte("bc")},
expectedErr: "args invalid: exceeds maximum size",
},
{
name: "null character",
maxSize: 10,
args: []string{"a", string([]byte{'b', 0})},
args: [][]byte{[]byte("a"), {'b', 0}},
expectedErr: "args invalid: contains NUL character",
},
}
@@ -123,7 +123,7 @@ func TestNewContext_Args(t *testing.T) {
func TestNewContext_Environ(t *testing.T) {
tests := []struct {
name string
environ []string
environ [][]byte
maxSize uint32
expectedSize uint32
expectedErr string
@@ -131,25 +131,25 @@ func TestNewContext_Environ(t *testing.T) {
{
name: "ok",
maxSize: 10,
environ: []string{"a=b", "c=de"},
environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedSize: 9,
},
{
name: "exceeds max count",
maxSize: 1,
environ: []string{"a=b", "c=de"},
environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedErr: "environ invalid: exceeds maximum count",
},
{
name: "exceeds max size",
maxSize: 4,
environ: []string{"a=b", "c=de"},
environ: [][]byte{[]byte("a=b"), []byte("c=de")},
expectedErr: "environ invalid: exceeds maximum size",
},
{
name: "null character",
maxSize: 10,
environ: []string{"a=b", string(append([]byte("c=d"), 0))},
environ: [][]byte{[]byte("a=b"), append([]byte("c=d"), 0)},
expectedErr: "environ invalid: contains NUL character",
},
}