fs: extracts syscallfs.ReaderAtOffset for WASI and gojs (#1037)

This extracts a utility `syscallfs.ReaderAtOffset()` to allow WASI and
gojs to re-use the same logic to implement `syscall.Pread`.

What's different than before is that if WASI passes multiple iovecs an
emulated `ReaderAt` will seek to the read position on each call to
`Read` vs once per loop. This was a design decision to keep the call
sites compatible between files that implement ReaderAt and those that
emulate them with Seeker (e.g. avoid the need for a read-scoped closer/
defer function). The main use case for emulation is `embed.file`, whose
seek function is cheap, so there's little performance impact to this.

Signed-off-by: Adrian Cole <adrian@tetrate.io>
This commit is contained in:
Crypt Keeper
2023-01-15 17:30:45 -08:00
committed by GitHub
parent 2ce8988ab7
commit 713e187796
11 changed files with 400 additions and 74 deletions

View File

@@ -610,15 +610,19 @@ there are no strong incentives to complicate the logic.
`ReadAt` is the Go equivalent to `pread`: it does not affect, and is not
affected by, the underlying file offset. Unfortunately, `io.ReaderAt` is not
implemented by all `fs.File`, notably `embed.file`.
implemented by all `fs.File`. For example, as of Go 1.19, `embed.openFile` does
not.
The initial implementation of `fd_pread` instead used `Seek`. To avoid a
regression we fallback to `io.Seeker` when `io.ReaderAt` is not supported.
regression, we fall back to `io.Seeker` when `io.ReaderAt` is not supported.
This requires obtaining the initial file offset, seeking to the intended read
offset, and reseting the file offset the initial state. If this final seek
offset, and resetting the file offset the initial state. If this final seek
fails, the file offset is left in an undefined state. This is not thread-safe.
While seeking per read seems expensive, the common case of `embed.openFile` is
only accessing a single int64 field, which is cheap.
### Pre-opened files
WASI includes `fd_prestat_get` and `fd_prestat_dir_name` functions used to

View File

@@ -502,49 +502,24 @@ func fdReadOrPread(mod api.Module, params []uint64, isPread bool) Errno {
fsc := mod.(*wasm.CallContext).Sys.FS()
fd := uint32(params[0])
iovs := uint32(params[1])
iovsCount := uint32(params[2])
var offset int64
var resultNread uint32
if isPread {
offset = int64(params[3])
resultNread = uint32(params[4])
} else {
resultNread = uint32(params[3])
}
r, ok := fsc.LookupFile(fd)
if !ok {
return ErrnoBadf
}
read := r.File.Read
var reader io.Reader = r.File
iovs := uint32(params[1])
iovsCount := uint32(params[2])
var resultNread uint32
if isPread {
if ra, ok := r.File.(io.ReaderAt); ok {
// ReadAt is the Go equivalent to pread.
read = func(p []byte) (int, error) {
n, err := ra.ReadAt(p, offset)
offset += int64(n)
return n, err
}
} else if s, ok := r.File.(io.Seeker); ok {
// Unfortunately, it is often not supported.
// See /RATIONALE.md "fd_pread: io.Seeker fallback when io.ReaderAt is not supported"
initialOffset, err := s.Seek(0, io.SeekCurrent)
if err != nil {
return ErrnoInval
}
defer func() { _, _ = s.Seek(initialOffset, io.SeekStart) }()
if offset != initialOffset {
_, err := s.Seek(offset, io.SeekStart)
if err != nil {
return ErrnoInval
}
}
} else {
return ErrnoInval
}
offset := int64(params[3])
reader = syscallfs.ReaderAtOffset(r.File, offset)
resultNread = uint32(params[4])
} else {
resultNread = uint32(params[3])
}
var nread uint32
@@ -563,7 +538,7 @@ func fdReadOrPread(mod api.Module, params []uint64, isPread bool) Errno {
return ErrnoFault
}
n, err := read(b)
n, err := reader.Read(b)
nread += uint32(n)
shouldContinue, errno := fdRead_shouldContinueRead(uint32(n), l, err)

View File

@@ -604,18 +604,8 @@ func Test_fdPread_Errors(t *testing.T) {
memory: []byte{'?', '?', '?', '?'}, // pass result.nread validation
expectedErrno: ErrnoBadf,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=42,iovs=65532,iovs_len=65532,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=42,iovs=65532,iovs_len=0,offset=0)
<== (nread=,errno=EBADF)
`,
},
{
name: "seek past file",
fd: fd,
offset: int64(len(contents) + 1),
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=65536,offset=7)
<== (nread=,errno=EFAULT)
`,
},
{
@@ -625,7 +615,7 @@ func Test_fdPread_Errors(t *testing.T) {
memory: []byte{'?'},
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=65535,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=0,offset=0)
<== (nread=,errno=EFAULT)
`,
},
@@ -639,7 +629,7 @@ func Test_fdPread_Errors(t *testing.T) {
},
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65532,iovs_len=65532,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65532,iovs_len=1,offset=0)
<== (nread=,errno=EFAULT)
`,
},
@@ -654,7 +644,7 @@ func Test_fdPread_Errors(t *testing.T) {
},
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65528,iovs_len=65528,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65528,iovs_len=1,offset=0)
<== (nread=,errno=EFAULT)
`,
},
@@ -670,7 +660,7 @@ func Test_fdPread_Errors(t *testing.T) {
},
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=65527,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=1,offset=0)
<== (nread=,errno=EFAULT)
`,
},
@@ -687,8 +677,27 @@ func Test_fdPread_Errors(t *testing.T) {
},
expectedErrno: ErrnoFault,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=65527,offset=0)
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=1,offset=0)
<== (nread=,errno=EFAULT)
`,
},
{
name: "offset negative",
fd: fd,
iovs: 1, iovsCount: 1,
resultNread: 10,
memory: []byte{
'?', // `iovs` is after this
9, 0, 0, 0, // = iovs[0].offset
1, 0, 0, 0, // = iovs[0].length
'?',
'?', '?', '?', '?',
},
offset: int64(-1),
expectedErrno: ErrnoIo,
expectedLog: `
==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65523,iovs_len=1,offset=-1)
<== (nread=,errno=EIO)
`,
},
}
@@ -703,7 +712,7 @@ func Test_fdPread_Errors(t *testing.T) {
memoryWriteOK := mod.Memory().Write(offset, tc.memory)
require.True(t, memoryWriteOK)
requireErrno(t, tc.expectedErrno, mod, FdPreadName, uint64(tc.fd), uint64(tc.iovs+offset), uint64(tc.iovsCount+offset), uint64(tc.offset), uint64(tc.resultNread+offset))
requireErrno(t, tc.expectedErrno, mod, FdPreadName, uint64(tc.fd), uint64(tc.iovs+offset), uint64(tc.iovsCount), uint64(tc.offset), uint64(tc.resultNread+offset))
require.Equal(t, tc.expectedLog, "\n"+log.String())
})
}

View File

@@ -274,17 +274,13 @@ func syscallRead(mod api.Module, fd uint32, offset interface{}, p []byte) (n uin
err = syscall.EBADF
}
var reader io.Reader = f.File
if offset != nil {
if s, ok := f.File.(io.Seeker); ok {
if _, err := s.Seek(toInt64(offset), io.SeekStart); err != nil {
return 0, err
}
} else {
return 0, syscall.ENOTSUP
}
reader = syscallfs.ReaderAtOffset(f.File, toInt64(offset))
}
if nRead, e := f.File.Read(p); e == nil || e == io.EOF {
if nRead, e := reader.Read(p); e == nil || e == io.EOF {
// fs_js.go cannot parse io.EOF so coerce it to nil.
// See https://github.com/golang/go/issues/43913
n = uint32(nRead)
@@ -309,7 +305,7 @@ func (jsfsWrite) invoke(ctx context.Context, mod api.Module, args ...interface{}
}
offset := goos.ValueToUint32(args[2])
byteCount := goos.ValueToUint32(args[3])
fOffset := args[4] // nil unless Pread
fOffset := args[4] // nil unless Pwrite
callback := args[5].(funcWrapper)
if byteCount > 0 { // empty is possible on EOF

View File

@@ -1,7 +1,9 @@
package fs
import (
"bytes"
"fmt"
"io"
"log"
"os"
"syscall"
@@ -34,12 +36,35 @@ func testAdHoc() {
}
}
// Read the full contents of the file using io.Reader
b, err := os.ReadFile("/animals.txt")
if err != nil {
log.Panicln(err)
}
fmt.Println("contents:", string(b))
// Re-open the same file to test io.ReaderAt
f, err := os.Open("/animals.txt")
if err != nil {
log.Panicln(err)
}
defer f.Close()
// Seek to an arbitrary position.
if _, err = f.Seek(4, io.SeekStart); err != nil {
log.Panicln(err)
}
b1 := make([]byte, len(b))
// We expect to revert to the original position on ReadAt zero.
if _, err = f.ReadAt(b1, 0); err != nil {
log.Panicln(err)
}
if !bytes.Equal(b, b1) {
log.Panicln("unexpected ReadAt contents: ", string(b1))
}
b, err = os.ReadFile("/empty.txt")
if err != nil {
log.Panicln(err)

View File

@@ -125,16 +125,36 @@ func TestAdapt_TestFS(t *testing.T) {
// Make a new fs.FS, noting the Go 1.17 fstest doesn't automatically filter
// "." entries in a directory. TODO: remove when we remove 1.17
fsFS := make(gofstest.MapFS, len(fstest.FS)-1)
mapFS := make(gofstest.MapFS, len(fstest.FS)-1)
for k, v := range fstest.FS {
if k != "." {
fsFS[k] = v
mapFS[k] = v
}
}
// Adapt a normal fs.FS to syscallfs.FS
testFS := Adapt("/", fsFS)
tmpDir := t.TempDir()
require.NoError(t, fstest.WriteTestFiles(tmpDir))
dirFS := os.DirFS(tmpDir)
// Adapt it back to fs.FS and run the tests
require.NoError(t, fstest.TestFS(&testFSAdapter{testFS}))
// TODO: We can't currently test embed.FS here because the source of
// fstest.FS are not real files.
tests := []struct {
name string
fs fs.FS
}{
{name: "os.DirFS", fs: dirFS},
{name: "fstest.MapFS", fs: mapFS},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
// Adapt a normal fs.FS to syscallfs.FS
testFS := Adapt("/", tc.fs)
// Adapt it back to fs.FS and run the tests
require.NoError(t, fstest.TestFS(&testFSAdapter{testFS}))
})
}
}

View File

@@ -4,6 +4,7 @@ import (
"io"
"io/fs"
"os"
"syscall"
)
// FS is a writeable fs.FS bridge backed by syscall functions needed for ABI
@@ -134,8 +135,8 @@ func StatPath(fs FS, path string) (fs.FileInfo, error) {
// readFile declares all read interfaces defined on os.File used by wazero.
type readFile interface {
fs.ReadDirFile
io.ReaderAt
io.Seeker
io.ReaderAt // for pread
io.Seeker // fallback for ReaderAt for embed:fs
}
// file declares all interfaces defined on os.File used by wazero.
@@ -146,3 +147,81 @@ type file interface {
}
type syncer interface{ Sync() error }
// ReaderAtOffset gets an io.Reader from a fs.File that reads from an offset,
// yet doesn't affect the underlying position. This is used to implement
// syscall.Pread.
//
// Note: The file accessed shouldn't be used concurrently, but wasm isn't safe
// to use concurrently anyway. Hence, we don't do any locking against parallel
// reads.
func ReaderAtOffset(f fs.File, offset int64) io.Reader {
if ret, ok := f.(io.ReaderAt); ok {
return &readerAtOffset{ret, offset}
} else if ret, ok := f.(io.ReadSeeker); ok {
return &seekToOffsetReader{ret, offset}
} else {
return enosysReader{}
}
}
type enosysReader struct{}
// enosysReader implements io.Reader
func (rs enosysReader) Read([]byte) (n int, err error) {
return 0, syscall.ENOSYS
}
type readerAtOffset struct {
r io.ReaderAt
offset int64
}
// Read implements io.Reader
func (r *readerAtOffset) Read(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil // less overhead on zero-length reads.
}
n, err := r.r.ReadAt(p, r.offset)
r.offset += int64(n)
return n, err
}
// seekToOffsetReader implements io.Reader that seeks to an offset and reverts
// to its initial offset after each call to Read.
//
// See /RATIONALE.md "fd_pread: io.Seeker fallback when io.ReaderAt is not supported"
type seekToOffsetReader struct {
s io.ReadSeeker
offset int64
}
// Read implements io.Reader
func (rs *seekToOffsetReader) Read(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil // less overhead on zero-length reads.
}
// Determine the current position in the file, as we need to revert it.
currentOffset, err := rs.s.Seek(0, io.SeekCurrent)
if err != nil {
return 0, err
}
// Put the read position back when complete.
defer func() { _, _ = rs.s.Seek(currentOffset, io.SeekStart) }()
// If the current offset isn't in sync with this reader, move it.
if rs.offset != currentOffset {
_, err := rs.s.Seek(rs.offset, io.SeekStart)
if err != nil {
return 0, err
}
}
// Perform the read, updating the offset.
n, err := rs.s.Read(p)
rs.offset += int64(n)
return n, err
}

View File

@@ -0,0 +1,63 @@
package syscallfs
import (
"io"
"io/fs"
"os"
"testing"
)
func BenchmarkReaderAtOffset(b *testing.B) {
dirFS := os.DirFS("testdata")
embedFS, err := fs.Sub(readerAtFS, "testdata")
if err != nil {
b.Fatal(err)
}
benches := []struct {
name string
fs fs.FS
ra bool
}{
{name: "os.DirFS io.File", fs: dirFS, ra: false},
{name: "os.DirFS readerAtOffset", fs: dirFS, ra: true},
{name: "embed.FS embed.file", fs: embedFS, ra: false},
{name: "embed.FS seekToOffsetReader", fs: embedFS, ra: true},
}
buf := make([]byte, 3)
for _, bc := range benches {
bc := bc
b.Run(bc.name, func(b *testing.B) {
f, err := bc.fs.Open("wazero.txt")
if err != nil {
b.Fatal(err)
}
defer f.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
// Reset the read position back to the beginning of the file.
if _, err = f.(io.Seeker).Seek(0, io.SeekStart); err != nil {
b.Fatal(err)
}
// Get the reader we are benchmarking
var r io.Reader = f
if bc.ra {
r = ReaderAtOffset(f, 3)
}
b.StartTimer()
if _, err := r.Read(buf); err != nil {
b.Fatal(err)
}
b.StopTimer()
}
})
}
}

View File

@@ -1,6 +1,8 @@
package syscallfs
import (
"embed"
_ "embed"
"errors"
"io"
"io/fs"
@@ -11,6 +13,7 @@ import (
"strings"
"syscall"
"testing"
"testing/fstest"
"time"
"github.com/tetratelabs/wazero/internal/platform"
@@ -257,3 +260,154 @@ func (f *testFSAdapter) Open(name string) (fs.File, error) {
func requireErrno(t *testing.T, expected syscall.Errno, actual error) {
require.True(t, errors.Is(actual, expected), "expected %v, but was %v", expected, actual)
}
var (
//go:embed testdata/*.txt
readerAtFS embed.FS
readerAtFile = "wazero.txt"
emptyFile = "empty.txt"
)
func TestReaderAtOffset(t *testing.T) {
embedFS, err := fs.Sub(readerAtFS, "testdata")
require.NoError(t, err)
d, err := embedFS.Open(readerAtFile)
require.NoError(t, err)
defer d.Close()
bytes, err := io.ReadAll(d)
require.NoError(t, err)
mapFS := fstest.MapFS{readerAtFile: &fstest.MapFile{Data: bytes}}
// Write a file as can't open "testdata" in scratch tests because they
// can't read the original filesystem.
tmpDir := t.TempDir()
require.NoError(t, os.WriteFile(path.Join(tmpDir, readerAtFile), bytes, 0o600))
dirFS := os.DirFS(tmpDir)
tests := []struct {
name string
fs fs.FS
}{
{name: "os.DirFS", fs: dirFS},
{name: "embed.FS", fs: embedFS},
{name: "fstest.MapFS", fs: mapFS},
}
buf := make([]byte, 3)
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
f, err := tc.fs.Open(readerAtFile)
require.NoError(t, err)
defer f.Close()
var r io.Reader = f
ra := ReaderAtOffset(f, 0)
requireRead3 := func(r io.Reader, buf []byte) {
n, err := r.Read(buf)
require.NoError(t, err)
require.Equal(t, 3, n)
}
// The file should work as a reader (base case)
requireRead3(r, buf)
require.Equal(t, "waz", string(buf))
buf = buf[:]
// The readerAt impl should be able to start from zero also
requireRead3(ra, buf)
require.Equal(t, "waz", string(buf))
buf = buf[:]
// If the offset didn't change, we expect the next three chars.
requireRead3(r, buf)
require.Equal(t, "ero", string(buf))
buf = buf[:]
// If state was held between reader-at, we expect the same
requireRead3(ra, buf)
require.Equal(t, "ero", string(buf))
buf = buf[:]
// We should also be able to make another reader-at
ra = ReaderAtOffset(f, 3)
requireRead3(ra, buf)
require.Equal(t, "ero", string(buf))
})
}
}
func TestReaderAtOffset_empty(t *testing.T) {
embedFS, err := fs.Sub(readerAtFS, "testdata")
require.NoError(t, err)
d, err := embedFS.Open(readerAtFile)
require.NoError(t, err)
defer d.Close()
mapFS := fstest.MapFS{emptyFile: &fstest.MapFile{}}
// Write a file as can't open "testdata" in scratch tests because they
// can't read the original filesystem.
tmpDir := t.TempDir()
require.NoError(t, os.WriteFile(path.Join(tmpDir, emptyFile), []byte{}, 0o600))
dirFS := os.DirFS(tmpDir)
tests := []struct {
name string
fs fs.FS
}{
{name: "os.DirFS", fs: dirFS},
{name: "embed.FS", fs: embedFS},
{name: "fstest.MapFS", fs: mapFS},
}
buf := make([]byte, 3)
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
f, err := tc.fs.Open(emptyFile)
require.NoError(t, err)
defer f.Close()
var r io.Reader = f
ra := ReaderAtOffset(f, 0)
requireRead3 := func(r io.Reader, buf []byte) {
n, err := r.Read(buf)
require.Equal(t, err, io.EOF)
require.Equal(t, 0, n) // file is empty
}
// The file should work as a reader (base case)
requireRead3(r, buf)
// The readerAt impl should be able to start from zero also
requireRead3(ra, buf)
})
}
}
func TestReaderAtOffset_Unsupported(t *testing.T) {
embedFS, err := fs.Sub(readerAtFS, "testdata")
require.NoError(t, err)
f, err := embedFS.Open(emptyFile)
require.NoError(t, err)
defer f.Close()
// mask both io.ReaderAt and io.Seeker
ra := ReaderAtOffset(struct{ fs.File }{f}, 0)
buf := make([]byte, 3)
_, err = ra.Read(buf)
require.Equal(t, syscall.ENOSYS, err)
}

0
internal/syscallfs/testdata/empty.txt vendored Normal file
View File

View File

@@ -0,0 +1 @@
wazero