add internal/sys.FileTable (#975)

Signed-off-by: Achille Roussel <achille.roussel@gmail.com>
This commit is contained in:
Achille
2022-12-30 14:34:25 -08:00
committed by GitHub
parent 4495979cf0
commit 1db5433154
6 changed files with 331 additions and 71 deletions

View File

@@ -592,6 +592,20 @@ operation will err if it needs to. This helps reduce the complexity of the code
in wazero and also accommodates the scenario where the bytes read are enough to
satisfy its processor.
### File descriptor allocation strategy
File descriptor allocation currently uses a strategy similar the one implemented
by unix systems: when opening a file, the lowest unused number is picked.
The WASI standard documents that programs cannot expect that file descriptor
numbers will be allocated with a lowest-first strategy, and they should instead
assume the values will be random. Since _random_ is a very imprecise concept in
computers, we technically satisfying the implementation with the descriptor
allocation strategy we use in Wazero. We could imagine adding more _randomness_
to the descriptor selection process, however this should never be used as a
security measure to prevent applications from guessing the next file number so
there are no strong incentives to complicate the logic.
### fd_pread: io.Seeker fallback when io.ReaderAt is not supported
`ReadAt` is the Go equivalent to `pread`: it does not affect, and is not

132
internal/sys/file_table.go Normal file
View File

@@ -0,0 +1,132 @@
package sys
import "math/bits"
// FileTable is a data structure mapping 32 bit file descriptor to file objects.
//
// The data structure optimizes for memory density and lookup performance,
// trading off compute at insertion time. This is a useful compromise for the
// use cases we employ it with: files are usually read or written a lot more
// often than they are opened, each operation requires a table lookup so we are
// better off spending extra compute to insert files in the table in order to
// get cheaper lookups. Memory efficiency is also crucial to support scaling
// with programs that open thousands of files: having a high or non-linear
// memory-to-item ratio could otherwise be used as an attack vector by malicous
// applications attempting to damage performance of the host.
type FileTable struct {
masks []uint64
files []*FileEntry
}
// Len returns the number of files stored in the table.
func (t *FileTable) Len() (n int) {
// We could make this a O(1) operation if we cached the number of files in
// the table. More state usually means more problems, so until we have a
// clear need for this, the simple implementation may be a better trade off.
for _, mask := range t.masks {
n += bits.OnesCount64(mask)
}
return n
}
// Grow ensures that t has enough room for n files, potentially reallocating the
// internal buffers if their capacity was too small to hold this many files.
func (t *FileTable) Grow(n int) {
// Round up to a multiple of 64 since this is the smallest increment due to
// using 64 bits masks.
n = (n*64 + 63) / 64
if n > len(t.masks) {
masks := make([]uint64, n)
copy(masks, t.masks)
files := make([]*FileEntry, n*64)
copy(files, t.files)
t.masks = masks
t.files = files
}
}
// Insert inserts the given file to the table, returning the fd that it is
// mapped to.
//
// The method does not perform deduplication, it is possible for the same file
// to be inserted multiple times, each insertion will return a different fd.
func (t *FileTable) Insert(file *FileEntry) (fd uint32) {
offset := 0
insert:
// Note: this loop could be made a lot more efficient using vectorized
// operations: 256 bits vector registers would yield a theoretical 4x
// speed up (e.g. using AVX2).
for index, mask := range t.masks[offset:] {
if ^mask != 0 { // not full?
shift := bits.TrailingZeros64(^mask)
index += offset
fd = uint32(index)*64 + uint32(shift)
t.files[fd] = file
t.masks[index] = mask | uint64(1<<shift)
return fd
}
}
offset = len(t.masks)
n := 2 * len(t.masks)
if n == 0 {
n = 1
}
t.Grow(n)
goto insert
}
// Lookup returns the file associated with the given fd (may be nil).
func (t *FileTable) Lookup(fd uint32) (file *FileEntry, found bool) {
if i := int(fd); i >= 0 && i < len(t.files) {
index := uint(fd) / 64
shift := uint(fd) % 64
if (t.masks[index] & (1 << shift)) != 0 {
file, found = t.files[i], true
}
}
return
}
// Delete deletes the file stored at the given fd from the table.
func (t *FileTable) Delete(fd uint32) {
if index, shift := fd/64, fd%64; int(index) < len(t.masks) {
mask := t.masks[index]
if (mask & (1 << shift)) != 0 {
t.files[fd] = nil
t.masks[index] = mask & ^uint64(1<<shift)
}
}
}
// Range calls f for each file and its associated fd in the table. The function
// f might return false to interupt the iteration.
func (t *FileTable) Range(f func(uint32, *FileEntry) bool) {
for i, mask := range t.masks {
if mask == 0 {
continue
}
for j := uint32(0); j < 64; j++ {
if (mask & (1 << j)) == 0 {
continue
}
if fd := uint32(i)*64 + j; !f(fd, t.files[fd]) {
return
}
}
}
}
// Reset clears the content of the table.
func (t *FileTable) Reset() {
for i := range t.masks {
t.masks[i] = 0
}
for i := range t.files {
t.files[i] = nil
}
}

View File

@@ -0,0 +1,124 @@
package sys_test
import (
"testing"
"github.com/tetratelabs/wazero/internal/sys"
)
func TestFileTable(t *testing.T) {
table := new(sys.FileTable)
if n := table.Len(); n != 0 {
t.Errorf("new table is not empty: length=%d", n)
}
// The id field is used as a sentinel value.
v0 := &sys.FileEntry{Name: "1"}
v1 := &sys.FileEntry{Name: "2"}
v2 := &sys.FileEntry{Name: "3"}
k0 := table.Insert(v0)
k1 := table.Insert(v1)
k2 := table.Insert(v2)
for _, lookup := range []struct {
key uint32
val *sys.FileEntry
}{
{key: k0, val: v0},
{key: k1, val: v1},
{key: k2, val: v2},
} {
if v, ok := table.Lookup(lookup.key); !ok {
t.Errorf("value not found for key '%v'", lookup.key)
} else if v.Name != lookup.val.Name {
t.Errorf("wrong value returned for key '%v': want=%v got=%v", lookup.key, lookup.val.Name, v.Name)
}
}
if n := table.Len(); n != 3 {
t.Errorf("wrong table length: want=3 got=%d", n)
}
k0Found := false
k1Found := false
k2Found := false
table.Range(func(k uint32, v *sys.FileEntry) bool {
var want *sys.FileEntry
switch k {
case k0:
k0Found, want = true, v0
case k1:
k1Found, want = true, v1
case k2:
k2Found, want = true, v2
}
if v.Name != want.Name {
t.Errorf("wrong value found ranging over '%v': want=%v got=%v", k, want.Name, v.Name)
}
return true
})
for _, found := range []struct {
key uint32
ok bool
}{
{key: k0, ok: k0Found},
{key: k1, ok: k1Found},
{key: k2, ok: k2Found},
} {
if !found.ok {
t.Errorf("key not found while ranging over table: %v", found.key)
}
}
for i, deletion := range []struct {
key uint32
}{
{key: k1},
{key: k0},
{key: k2},
} {
table.Delete(deletion.key)
if _, ok := table.Lookup(deletion.key); ok {
t.Errorf("item found after deletion of '%v'", deletion.key)
}
if n, want := table.Len(), 3-(i+1); n != want {
t.Errorf("wrong table length after deletion: want=%d got=%d", want, n)
}
}
}
func BenchmarkFileTableInsert(b *testing.B) {
table := new(sys.FileTable)
entry := new(sys.FileEntry)
for i := 0; i < b.N; i++ {
table.Insert(entry)
if (i % 65536) == 0 {
table.Reset() // to avoid running out of memory
}
}
}
func BenchmarkFileTableLookup(b *testing.B) {
const sentinel = "42"
const numFiles = 65536
table := new(sys.FileTable)
files := make([]uint32, numFiles)
entry := &sys.FileEntry{Name: sentinel}
for i := range files {
files[i] = table.Insert(entry)
}
var f *sys.FileEntry
for i := 0; i < b.N; i++ {
f, _ = table.Lookup(files[i%numFiles])
}
if f.Name != sentinel {
b.Error("wrong file returned by lookup")
}
}

View File

@@ -6,10 +6,8 @@ import (
"fmt"
"io"
"io/fs"
"math"
"os"
"path"
"sync/atomic"
"syscall"
"time"
@@ -182,10 +180,7 @@ type FSContext struct {
// openedFiles is a map of file descriptor numbers (>=FdRoot) to open files
// (or directories) and defaults to empty.
// TODO: This is unguarded, so not goroutine-safe!
openedFiles map[uint32]*FileEntry
// lastFD is not meant to be read directly. Rather by nextFD.
lastFD uint32
openedFiles FileTable
}
var errNotDir = errors.New("not a directory")
@@ -196,15 +191,10 @@ var errNotDir = errors.New("not a directory")
// context can open files in that file system. Any error on opening "." is
// returned.
func NewFSContext(stdin io.Reader, stdout, stderr io.Writer, root fs.FS) (fsc *FSContext, err error) {
fsc = &FSContext{
fs: root,
openedFiles: map[uint32]*FileEntry{
FdStdin: stdinReader(stdin),
FdStdout: stdioWriter(stdout, noopStdoutStat),
FdStderr: stdioWriter(stderr, noopStderrStat),
},
lastFD: FdStderr,
}
fsc = &FSContext{fs: root}
fsc.openedFiles.Insert(stdinReader(stdin))
fsc.openedFiles.Insert(stdioWriter(stdout, noopStdoutStat))
fsc.openedFiles.Insert(stdioWriter(stderr, noopStderrStat))
if root == EmptyFS {
return fsc, nil
@@ -233,9 +223,7 @@ func NewFSContext(stdin io.Reader, stdout, stderr io.Writer, root fs.FS) (fsc *F
return
}
fsc.openedFiles[FdRoot] = &FileEntry{Name: "/", File: rootDir}
fsc.lastFD = FdRoot
fsc.openedFiles.Insert(&FileEntry{Name: "/", File: rootDir})
return fsc, nil
}
@@ -262,24 +250,13 @@ func stdioStat(f interface{}, defaultStat stdioFileInfo) fs.FileInfo {
return defaultStat
}
// nextFD gets the next file descriptor number in a goroutine safe way (monotonically) or zero if we ran out.
// TODO: openedFiles is still not goroutine safe!
// TODO: This can return zero if we ran out of file descriptors. A future change can optimize by re-using an FD pool.
func (c *FSContext) nextFD() uint32 {
if c.lastFD == math.MaxUint32 {
return 0
}
return atomic.AddUint32(&c.lastFD, 1)
}
// OpenedFile returns a file and true if it was opened or nil and false, if syscall.EBADF.
func (c *FSContext) OpenedFile(fd uint32) (*FileEntry, bool) {
f, ok := c.openedFiles[fd]
return f, ok
return c.openedFiles.Lookup(fd)
}
func (c *FSContext) StatFile(fd uint32) (fs.FileInfo, error) {
f, ok := c.openedFiles[fd]
f, ok := c.openedFiles.Lookup(fd)
if !ok {
return nil, syscall.EBADF
}
@@ -340,12 +317,7 @@ func (c *FSContext) OpenFile(name string, flags int, perm fs.FileMode) (newFD ui
return 0, err
}
newFD = c.nextFD()
if newFD == 0 { // TODO: out of file descriptors
_ = f.Close()
return 0, syscall.EBADF
}
c.openedFiles[newFD] = &FileEntry{Name: path.Base(name), File: f}
newFD = c.openedFiles.Insert(&FileEntry{Name: path.Base(name), File: f})
return newFD, nil
}
@@ -408,7 +380,7 @@ func (c *FSContext) cleanPath(name string) string {
// FdWriter returns a valid writer for the given file descriptor or nil if syscall.EBADF.
func (c *FSContext) FdWriter(fd uint32) io.Writer {
// Check to see if the file descriptor is available
if f, ok := c.openedFiles[fd]; !ok {
if f, ok := c.openedFiles.Lookup(fd); !ok {
return nil
} else if writer, ok := f.File.(io.Writer); !ok {
// Go's syscall.Write also returns EBADF if the FD is present, but not writeable
@@ -427,7 +399,7 @@ func (c *FSContext) FdReader(fd uint32) io.Reader {
return nil // directory, not a readable file.
}
if f, ok := c.openedFiles[fd]; !ok {
if f, ok := c.openedFiles.Lookup(fd); !ok {
return nil // TODO: could be a directory not a file.
} else {
return f.File
@@ -436,11 +408,11 @@ func (c *FSContext) FdReader(fd uint32) io.Reader {
// CloseFile returns true if a file was opened and closed without error, or false if syscall.EBADF.
func (c *FSContext) CloseFile(fd uint32) bool {
f, ok := c.openedFiles[fd]
f, ok := c.openedFiles.Lookup(fd)
if !ok {
return false
}
delete(c.openedFiles, fd)
c.openedFiles.Delete(fd)
if err := f.File.Close(); err != nil {
return false
@@ -451,11 +423,14 @@ func (c *FSContext) CloseFile(fd uint32) bool {
// Close implements api.Closer
func (c *FSContext) Close(context.Context) (err error) {
// Close any files opened in this context
for fd, entry := range c.openedFiles {
delete(c.openedFiles, fd)
c.openedFiles.Range(func(fd uint32, entry *FileEntry) bool {
if e := entry.File.Close(); e != nil {
err = e // This means err returned == the last non-nil error.
}
}
return true
})
// A closed FSContext cannot be reused so clear the state instead of
// using Reset.
c.openedFiles = FileTable{}
return
}

View File

@@ -62,13 +62,35 @@ func TestNewFSContext(t *testing.T) {
require.NoError(t, err)
defer fsc.Close(testCtx)
rootFile, _ := fsc.openedFiles.Lookup(FdRoot)
require.Equal(t, tc.fs, fsc.fs)
require.Equal(t, "/", fsc.openedFiles[FdRoot].Name)
rootFile := fsc.openedFiles[FdRoot].File
require.NotNil(t, rootFile)
require.Equal(t, "/", rootFile.Name)
_, osFile := rootFile.(*os.File)
_, osFile := rootFile.File.(*os.File)
require.Equal(t, tc.expectOsFile, osFile)
f0, err := fsc.OpenFile("/", 0, 0)
require.NoError(t, err)
// Verify that each call to OpenFile returns a different file
// descriptor.
f1, err := fsc.OpenFile("/", 0, 0)
require.NoError(t, err)
require.NotEqual(t, f0, f1)
// Verify that file descriptors are reused.
//
// Note that this specific behavior is not required by WASI which
// only documents that file descriptor numbers will be selected
// randomly and applications should not rely on them. We added this
// test to ensure that our implementation properly reuses descriptor
// numbers but if we were to change the reuse strategy, this test
// would likely break and need to be updated.
require.True(t, fsc.CloseFile(f0))
f2, err := fsc.OpenFile("/", 0, 0)
require.NoError(t, err)
require.Equal(t, f0, f2)
})
}
}
@@ -93,15 +115,10 @@ func TestEmptyFSContext(t *testing.T) {
testFS, err := NewFSContext(nil, nil, nil, EmptyFS)
require.NoError(t, err)
expected := &FSContext{
fs: EmptyFS,
openedFiles: map[uint32]*FileEntry{
FdStdin: noopStdin,
FdStdout: noopStdout,
FdStderr: noopStderr,
},
lastFD: FdStderr,
}
expected := &FSContext{fs: EmptyFS}
expected.openedFiles.Insert(noopStdin)
expected.openedFiles.Insert(noopStdout)
expected.openedFiles.Insert(noopStderr)
t.Run("OpenFile doesn't affect state", func(t *testing.T) {
fd, err := testFS.OpenFile("foo.txt", os.O_RDONLY, 0)
@@ -117,11 +134,7 @@ func TestEmptyFSContext(t *testing.T) {
require.NoError(t, err)
// Closes opened files
require.Equal(t, &FSContext{
fs: EmptyFS,
openedFiles: map[uint32]*FileEntry{},
lastFD: FdStderr,
}, testFS)
require.Equal(t, &FSContext{fs: EmptyFS}, testFS)
})
}
@@ -188,17 +201,17 @@ func TestContext_Close(t *testing.T) {
fsc, err := NewFSContext(nil, nil, nil, testfs.FS{"foo": &testfs.File{}})
require.NoError(t, err)
// Verify base case
require.Equal(t, 1+FdRoot, uint32(len(fsc.openedFiles)))
require.Equal(t, 1+FdRoot, uint32(fsc.openedFiles.Len()))
_, err = fsc.OpenFile("foo", os.O_RDONLY, 0)
require.NoError(t, err)
require.Equal(t, 2+FdRoot, uint32(len(fsc.openedFiles)))
require.Equal(t, 2+FdRoot, uint32(fsc.openedFiles.Len()))
// Closing should not err.
require.NoError(t, fsc.Close(testCtx))
// Verify our intended side-effect
require.Zero(t, len(fsc.openedFiles))
require.Zero(t, fsc.openedFiles.Len())
// Verify no error closing again.
require.NoError(t, fsc.Close(testCtx))
@@ -216,5 +229,5 @@ func TestContext_Close_Error(t *testing.T) {
require.EqualError(t, fsc.Close(testCtx), "error closing")
// Paths should clear even under error
require.Zero(t, len(fsc.openedFiles), "expected no opened files")
require.Zero(t, fsc.openedFiles.Len(), "expected no opened files")
}

View File

@@ -51,12 +51,14 @@ func TestDefaultSysContext(t *testing.T) {
require.Equal(t, platform.NewFakeRandSource(), sysCtx.RandSource())
expectedFS, _ := NewFSContext(nil, nil, nil, testfs.FS{})
require.Equal(t, map[uint32]*FileEntry{
FdStdin: noopStdin,
FdStdout: noopStdout,
FdStderr: noopStderr,
FdRoot: {Name: "/", File: emptyRootDir{}},
}, expectedFS.openedFiles)
expectedOpenedFiles := FileTable{}
expectedOpenedFiles.Insert(noopStdin)
expectedOpenedFiles.Insert(noopStdout)
expectedOpenedFiles.Insert(noopStderr)
expectedOpenedFiles.Insert(&FileEntry{Name: "/", File: emptyRootDir{}})
require.Equal(t, expectedOpenedFiles, expectedFS.openedFiles)
require.Equal(t, expectedFS, sysCtx.FS())
}