From 6f30a42828ce4eeccd5a04f0ef135efc58b7a2a3 Mon Sep 17 00:00:00 2001
From: Crypt Keeper <64215+codefromthecrypt@users.noreply.github.com>
Date: Mon, 5 Dec 2022 11:07:51 +0800
Subject: [PATCH] wasi: optimizes args/environ parsing (#885)

While most compilers will only read args/environ once, tools like WAGI
make heavy use of environment, possibly dozens of long variables. This
optimizes both args and environ for this reason and also to setup for
optimizing other functions.

Here are the notable changes:
* eagerly coerce to byte slices instead of strings
* re-use null terminated length for writing values
* avoid loops that call mem.WriteXXX internally

Signed-off-by: Adrian Cole <adrian@tetrate.io>
---
 config.go                                     | 39 +++++++---
 config_test.go                                |  4 +-
 imports/wasi_snapshot_preview1/args.go        |  5 +-
 imports/wasi_snapshot_preview1/environ.go     |  4 +-
 imports/wasi_snapshot_preview1/wasi.go        | 46 ++++++++----
 .../wasi_snapshot_preview1/wasi_bench_test.go | 72 ++++++++-----------
 internal/gojs/argsenv.go                      |  4 +-
 internal/sys/sys.go                           | 10 +--
 internal/sys/sys_test.go                      | 20 +++---
 9 files changed, 115 insertions(+), 89 deletions(-)

diff --git a/config.go b/config.go
index 8a19eea5..5066e66b 100644
--- a/config.go
+++ b/config.go
@@ -462,9 +462,9 @@ type moduleConfig struct {
 	nanotime           *sys.Nanotime
 	nanotimeResolution sys.ClockResolution
 	nanosleep          *sys.Nanosleep
-	args               []string
+	args               [][]byte
 	// environ is pair-indexed to retain order similar to os.Environ.
-	environ []string
+	environ [][]byte
 	// environKeys allow overwriting of existing values.
 	environKeys map[string]int
 	// fs is the file system to open files with
@@ -492,19 +492,30 @@ func (c *moduleConfig) clone() *moduleConfig {
 // WithArgs implements ModuleConfig.WithArgs
 func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
 	ret := c.clone()
-	ret.args = args
+	ret.args = toByteSlices(args)
 	return ret
 }
 
+func toByteSlices(strings []string) (result [][]byte) {
+	if len(strings) == 0 {
+		return
+	}
+	result = make([][]byte, len(strings))
+	for i, a := range strings {
+		result[i] = []byte(a)
+	}
+	return
+}
+
 // WithEnv implements ModuleConfig.WithEnv
 func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
 	ret := c.clone()
 	// Check to see if this key already exists and update it.
 	if i, ok := ret.environKeys[key]; ok {
-		ret.environ[i+1] = value // environ is pair-indexed, so the value is 1 after the key.
+		ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
 	} else {
 		ret.environKeys[key] = len(ret.environ)
-		ret.environ = append(ret.environ, key, value)
+		ret.environ = append(ret.environ, []byte(key), []byte(value))
 	}
 	return ret
 }
@@ -602,21 +613,29 @@ func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
 
 // toSysContext creates a baseline wasm.Context configured by ModuleConfig.
 func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
-	var environ []string // Intentionally doesn't pre-allocate to reduce logic to default to nil.
+	var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
 	// Same validation as syscall.Setenv for Linux
 	for i := 0; i < len(c.environ); i += 2 {
 		key, value := c.environ[i], c.environ[i+1]
-		if len(key) == 0 {
+		keyLen := len(key)
+		if keyLen == 0 {
 			err = errors.New("environ invalid: empty key")
 			return
 		}
-		for j := 0; j < len(key); j++ {
-			if key[j] == '=' { // NUL enforced in NewContext
+		valueLen := len(value)
+		result := make([]byte, keyLen+valueLen+1)
+		j := 0
+		for ; j < keyLen; j++ {
+			if k := key[j]; k == '=' { // NUL enforced in NewContext
 				err = errors.New("environ invalid: key contains '=' character")
 				return
+			} else {
+				result[j] = k
 			}
 		}
-		environ = append(environ, key+"="+value)
+		result[j] = '='
+		copy(result[j+1:], value)
+		environ = append(environ, result)
 	}
 
 	return internalsys.NewContext(
diff --git a/config_test.go b/config_test.go
index 358a5769..d0cde23d 100644
--- a/config_test.go
+++ b/config_test.go
@@ -639,8 +639,8 @@ func requireSysContext(
 ) *internalsys.Context {
 	sysCtx, err := internalsys.NewContext(
 		max,
-		args,
-		environ,
+		toByteSlices(args),
+		toByteSlices(environ),
 		stdin,
 		stdout,
 		stderr,
diff --git a/imports/wasi_snapshot_preview1/args.go b/imports/wasi_snapshot_preview1/args.go
index 441c9dfc..e549a624 100644
--- a/imports/wasi_snapshot_preview1/args.go
+++ b/imports/wasi_snapshot_preview1/args.go
@@ -59,7 +59,7 @@ var argsGet = &wasm.HostFunc{
 func argsGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
 	sysCtx := mod.(*wasm.CallContext).Sys
 	argv, argvBuf := uint32(params[0]), uint32(params[1])
-	return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf)
+	return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Args(), argv, argvBuf, sysCtx.ArgsSize())
 }
 
 // argsSizesGet is the WASI function named functionArgsSizesGet that reads
@@ -108,7 +108,8 @@ func argsSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Errno
 	mem := mod.Memory()
 	resultArgc, resultArgvLen := uint32(params[0]), uint32(params[1])
 
-	// Write the Errno back to the stack
+	// argc and argv_len offsets are not necessarily sequential, so we have to
+	// write them independently.
 	if !mem.WriteUint32Le(ctx, resultArgc, uint32(len(sysCtx.Args()))) {
 		return ErrnoFault
 	}
diff --git a/imports/wasi_snapshot_preview1/environ.go b/imports/wasi_snapshot_preview1/environ.go
index 5d9c0e4d..d5b75cd7 100644
--- a/imports/wasi_snapshot_preview1/environ.go
+++ b/imports/wasi_snapshot_preview1/environ.go
@@ -60,7 +60,7 @@ func environGetFn(ctx context.Context, mod api.Module, params []uint64) Errno {
 	sysCtx := mod.(*wasm.CallContext).Sys
 	environ, environBuf := uint32(params[0]), uint32(params[1])
 
-	return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf)
+	return writeOffsetsAndNullTerminatedValues(ctx, mod.Memory(), sysCtx.Environ(), environ, environBuf, sysCtx.EnvironSize())
 }
 
 // environSizesGet is the WASI function named functionEnvironSizesGet that
@@ -111,6 +111,8 @@ func environSizesGetFn(ctx context.Context, mod api.Module, params []uint64) Err
 	mem := mod.Memory()
 	resultEnvironc, resultEnvironvLen := uint32(params[0]), uint32(params[1])
 
+	// environc and environv_len offsets are not necessarily sequential, so we
+	// have to write them independently.
 	if !mem.WriteUint32Le(ctx, resultEnvironc, uint32(len(sysCtx.Environ()))) {
 		return ErrnoFault
 	}
diff --git a/imports/wasi_snapshot_preview1/wasi.go b/imports/wasi_snapshot_preview1/wasi.go
index baf5a6be..9d3cd10c 100644
--- a/imports/wasi_snapshot_preview1/wasi.go
+++ b/imports/wasi_snapshot_preview1/wasi.go
@@ -215,23 +215,41 @@ func exportFunctions(builder wazero.HostModuleBuilder) {
 	exporter.ExportHostFunc(sockShutdown)
 }
 
-func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values []string, offsets, bytes uint32) Errno {
+// writeOffsetsAndNullTerminatedValues is used to write NUL-terminated values
+// for args or environ, given a pre-defined bytesLen (which includes NUL
+// terminators).
+func writeOffsetsAndNullTerminatedValues(ctx context.Context, mem api.Memory, values [][]byte, offsets, bytes, bytesLen uint32) Errno {
+	// The caller may not place bytes directly after offsets, so we have to
+	// read them independently.
+	valuesLen := len(values)
+	offsetsLen := uint32(valuesLen * 4) // uint32Le
+	offsetsBuf, ok := mem.Read(ctx, offsets, offsetsLen)
+	if !ok {
+		return ErrnoFault
+	}
+	bytesBuf, ok := mem.Read(ctx, bytes, bytesLen)
+	if !ok {
+		return ErrnoFault
+	}
+
+	// Loop through the values, first writing the location of its data to
+	// offsetsBuf[oI], then its NUL-terminated data at bytesBuf[bI]
+	var oI, bI uint32
 	for _, value := range values {
-		// Write current offset and advance it.
-		if !mem.WriteUint32Le(ctx, offsets, bytes) {
-			return ErrnoFault
-		}
-		offsets += 4 // size of uint32
+		// Go can't guarantee inlining as there's not //go:inline directive.
+		// This inlines uint32 little-endian encoding instead.
+		bytesOffset := bytes + bI
+		offsetsBuf[oI] = byte(bytesOffset)
+		offsetsBuf[oI+1] = byte(bytesOffset >> 8)
+		offsetsBuf[oI+2] = byte(bytesOffset >> 16)
+		offsetsBuf[oI+3] = byte(bytesOffset >> 24)
+		oI += 4 // size of uint32 we just wrote
 
 		// Write the next value to memory with a NUL terminator
-		if !mem.Write(ctx, bytes, []byte(value)) {
-			return ErrnoFault
-		}
-		bytes += uint32(len(value))
-		if !mem.WriteByte(ctx, bytes, 0) {
-			return ErrnoFault
-		}
-		bytes++
+		copy(bytesBuf[bI:], value)
+		bI += uint32(len(value))
+		bytesBuf[bI] = 0 // NUL terminator
+		bI++
 	}
 
 	return ErrnoSuccess
diff --git a/imports/wasi_snapshot_preview1/wasi_bench_test.go b/imports/wasi_snapshot_preview1/wasi_bench_test.go
index 913d6b62..cdb2adab 100644
--- a/imports/wasi_snapshot_preview1/wasi_bench_test.go
+++ b/imports/wasi_snapshot_preview1/wasi_bench_test.go
@@ -6,60 +6,46 @@ import (
 	"github.com/tetratelabs/wazero"
 	"github.com/tetratelabs/wazero/api"
 	"github.com/tetratelabs/wazero/internal/testing/proxy"
-	"github.com/tetratelabs/wazero/internal/testing/require"
 )
 
-var testMem = []byte{
-	0,                // environBuf is after this
-	'a', '=', 'b', 0, // null terminated "a=b",
-	'b', '=', 'c', 'd', 0, // null terminated "b=cd"
-	0,          // environ is after this
-	1, 0, 0, 0, // little endian-encoded offset of "a=b"
-	5, 0, 0, 0, // little endian-encoded offset of "b=cd"
-	0,
-}
+// configArgsEnviron ensures the result data are the same between args and ENV.
+var configArgsEnviron = wazero.NewModuleConfig().
+	WithArgs("aa=bbbb", "cccccc=dddddddd", "eeeeeeeeee=ffffffffffff").
+	WithEnv("aa", "bbbb").
+	WithEnv("cccccc", "dddddddd").
+	WithEnv("eeeeeeeeee", "ffffffffffff")
 
-func Test_Benchmark_EnvironGet(t *testing.T) {
-	mod, r, log := requireProxyModule(t, wazero.NewModuleConfig().
-		WithEnv("a", "b").WithEnv("b", "cd"))
-	defer r.Close(testCtx)
-
-	// Invoke environGet and check the memory side effects.
-	requireErrno(t, ErrnoSuccess, mod, functionEnvironGet, uint64(11), uint64(1))
-	require.Equal(t, `
---> proxy.environ_get(environ=11,environ_buf=1)
-	==> wasi_snapshot_preview1.environ_get(environ=11,environ_buf=1)
-	<== ESUCCESS
-<-- (0)
-`, "\n"+log.String())
-
-	mem, ok := mod.Memory().Read(testCtx, 0, uint32(len(testMem)))
-	require.True(t, ok)
-	require.Equal(t, testMem, mem)
-}
-
-func Benchmark_EnvironGet(b *testing.B) {
+func Benchmark_ArgsEnviron(b *testing.B) {
 	r := wazero.NewRuntime(testCtx)
 	defer r.Close(testCtx)
 
-	mod, err := instantiateProxyModule(r, wazero.NewModuleConfig().
-		WithEnv("a", "b").WithEnv("b", "cd"))
+	mod, err := instantiateProxyModule(r, configArgsEnviron)
 	if err != nil {
 		b.Fatal(err)
 	}
 
-	b.Run("environGet", func(b *testing.B) {
-		for i := 0; i < b.N; i++ {
-			results, err := mod.ExportedFunction(functionEnvironGet).Call(testCtx, uint64(0), uint64(4))
-			if err != nil {
-				b.Fatal(err)
+	for _, n := range []string{
+		functionArgsGet,
+		functionArgsSizesGet,
+		functionEnvironGet,
+		functionEnvironSizesGet,
+	} {
+		n := n
+		fn := mod.ExportedFunction(n)
+		b.Run(n, func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				results, err := fn.Call(testCtx, uint64(0), uint64(4))
+				if err != nil {
+					b.Fatal(err)
+				}
+				errno := Errno(results[0])
+				if errno != 0 {
+					b.Fatal(ErrnoName(errno))
+				}
 			}
-			errno := Errno(results[0])
-			if errno != ErrnoSuccess {
-				b.Fatal(ErrnoName(errno))
-			}
-		}
-	})
+		})
+	}
 }
 
 // instantiateProxyModule instantiates a guest that re-exports WASI functions.
diff --git a/internal/gojs/argsenv.go b/internal/gojs/argsenv.go
index 11899b4b..4289bee4 100644
--- a/internal/gojs/argsenv.go
+++ b/internal/gojs/argsenv.go
@@ -26,10 +26,10 @@ func WriteArgsAndEnviron(ctx context.Context, mod api.Module) (argc, argv uint32
 	argc = uint32(len(args))
 	offset := endOfPageZero
 
-	strPtr := func(val, field string, i int) (ptr uint32) {
+	strPtr := func(val []byte, field string, i int) (ptr uint32) {
 		// TODO: return err and format "%s[%d], field, i"
 		ptr = offset
-		mustWrite(ctx, mem, field, offset, append([]byte(val), 0))
+		mustWrite(ctx, mem, field, offset, append(val, 0))
 		offset += uint32(len(val) + 1)
 		if pad := offset % 8; pad != 0 {
 			offset += 8 - pad
diff --git a/internal/sys/sys.go b/internal/sys/sys.go
index c0d21853..2b9f4b19 100644
--- a/internal/sys/sys.go
+++ b/internal/sys/sys.go
@@ -15,7 +15,7 @@ import (
 // Context holds module-scoped system resources currently only supported by
 // built-in host functions.
 type Context struct {
-	args, environ         []string
+	args, environ         [][]byte
 	argsSize, environSize uint32
 	stdin                 io.Reader
 	stdout, stderr        io.Writer
@@ -35,7 +35,7 @@ type Context struct {
 //
 // Note: The count will never be more than math.MaxUint32.
 // See wazero.ModuleConfig WithArgs
-func (c *Context) Args() []string {
+func (c *Context) Args() [][]byte {
 	return c.args
 }
 
@@ -52,7 +52,7 @@ func (c *Context) ArgsSize() uint32 {
 //
 // Note: The count will never be more than math.MaxUint32.
 // See wazero.ModuleConfig WithEnv
-func (c *Context) Environ() []string {
+func (c *Context) Environ() [][]byte {
 	return c.environ
 }
 
@@ -150,7 +150,7 @@ var (
 // Note: max is exposed for testing. max is only used for env/args validation.
 func NewContext(
 	max uint32,
-	args, environ []string,
+	args, environ [][]byte,
 	stdin io.Reader,
 	stdout, stderr io.Writer,
 	randSource io.Reader,
@@ -239,7 +239,7 @@ func clockResolutionInvalid(resolution sys.ClockResolution) bool {
 
 // nullTerminatedByteCount ensures the count or Nul-terminated length of the elements doesn't exceed max, and that no
 // element includes the nul character.
-func nullTerminatedByteCount(max uint32, elements []string) (uint32, error) {
+func nullTerminatedByteCount(max uint32, elements [][]byte) (uint32, error) {
 	count := uint32(len(elements))
 	if count > max {
 		return 0, errors.New("exceeds maximum count")
diff --git a/internal/sys/sys_test.go b/internal/sys/sys_test.go
index 1779dd36..a22ddffb 100644
--- a/internal/sys/sys_test.go
+++ b/internal/sys/sys_test.go
@@ -61,7 +61,7 @@ func TestDefaultSysContext(t *testing.T) {
 func TestNewContext_Args(t *testing.T) {
 	tests := []struct {
 		name         string
-		args         []string
+		args         [][]byte
 		maxSize      uint32
 		expectedSize uint32
 		expectedErr  string
@@ -69,25 +69,25 @@ func TestNewContext_Args(t *testing.T) {
 		{
 			name:         "ok",
 			maxSize:      10,
-			args:         []string{"a", "bc"},
+			args:         [][]byte{[]byte("a"), []byte("bc")},
 			expectedSize: 5,
 		},
 		{
 			name:        "exceeds max count",
 			maxSize:     1,
-			args:        []string{"a", "bc"},
+			args:        [][]byte{[]byte("a"), []byte("bc")},
 			expectedErr: "args invalid: exceeds maximum count",
 		},
 		{
 			name:        "exceeds max size",
 			maxSize:     4,
-			args:        []string{"a", "bc"},
+			args:        [][]byte{[]byte("a"), []byte("bc")},
 			expectedErr: "args invalid: exceeds maximum size",
 		},
 		{
 			name:        "null character",
 			maxSize:     10,
-			args:        []string{"a", string([]byte{'b', 0})},
+			args:        [][]byte{[]byte("a"), {'b', 0}},
 			expectedErr: "args invalid: contains NUL character",
 		},
 	}
@@ -123,7 +123,7 @@ func TestNewContext_Args(t *testing.T) {
 func TestNewContext_Environ(t *testing.T) {
 	tests := []struct {
 		name         string
-		environ      []string
+		environ      [][]byte
 		maxSize      uint32
 		expectedSize uint32
 		expectedErr  string
@@ -131,25 +131,25 @@ func TestNewContext_Environ(t *testing.T) {
 		{
 			name:         "ok",
 			maxSize:      10,
-			environ:      []string{"a=b", "c=de"},
+			environ:      [][]byte{[]byte("a=b"), []byte("c=de")},
 			expectedSize: 9,
 		},
 		{
 			name:        "exceeds max count",
 			maxSize:     1,
-			environ:     []string{"a=b", "c=de"},
+			environ:     [][]byte{[]byte("a=b"), []byte("c=de")},
 			expectedErr: "environ invalid: exceeds maximum count",
 		},
 		{
 			name:        "exceeds max size",
 			maxSize:     4,
-			environ:     []string{"a=b", "c=de"},
+			environ:     [][]byte{[]byte("a=b"), []byte("c=de")},
 			expectedErr: "environ invalid: exceeds maximum size",
 		},
 		{
 			name:        "null character",
 			maxSize:     10,
-			environ:     []string{"a=b", string(append([]byte("c=d"), 0))},
+			environ:     [][]byte{[]byte("a=b"), append([]byte("c=d"), 0)},
 			expectedErr: "environ invalid: contains NUL character",
 		},
 	}