diff --git a/RATIONALE.md b/RATIONALE.md index 7bc05405..9580ea7e 100644 --- a/RATIONALE.md +++ b/RATIONALE.md @@ -931,10 +931,8 @@ Firstly, wasi-testsuite does not require the inode of dot-dot, possibly because the wasip2 adapter doesn't populate it (but we don't really know why). The only other reason to populate it would be to avoid wasi-libc's stat fanout -when it is missing. However, the inode for dot-dot is not cached, and is also -likely not possible to get on a pseudo file. Even if we could get it, we would -have to use stat. In other words, pre-populating this would have the same cost -as waiting for something to call stat instead. +when it is missing. However, wasi-libc explicitly doesn't fan-out to lstat on +the ".." entry on a zero ino. Fetching dot-dot's inode despite the above not only doesn't help wasi-libc, but it also hurts languages that don't use it, such as Go. These languages would @@ -945,10 +943,70 @@ In summary, there are no significant upsides in attempting to pre-fetch dot-dot's inode, and there are downsides to doing it anyway. See - * https://github.com/WebAssembly/wasi-libc/pull/345 + * https://github.com/WebAssembly/wasi-libc/blob/bd950eb128bff337153de217b11270f948d04bb4/libc-bottom-half/cloudlibc/src/libc/dirent/readdir.c#L87-L94 * https://github.com/WebAssembly/wasi-testsuite/blob/main/tests/rust/src/bin/fd_readdir.rs#L108 * https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L1037 +### Why don't we require inodes to be non-zero? + +We don't require a non-zero value for `Dirent.Ino` because doing so can prevent +a real one from resolving later via `Stat_t.Ino`. + +We define `Ino` like `d_ino` in POSIX which doesn't special-case zero. It can +be zero for a few reasons: + +* The file is not a regular file or directory. +* The underlying filesystem does not support inodes. e.g. embed:fs +* A directory doesn't include inodes, but a later stat can. e.g. Windows +* The backend is based on wasi-filesystem (a.k.a wasip2), which has + `directory_entry.inode` optional, and might remove it entirely. + +There are other downsides to returning a zero inode in widely used compilers: + +* File equivalence utilities, like `os.SameFile` will not work. +* wasi-libc's `wasip1` mode will call `lstat` and attempt to retrieve a + non-zero value (unless the entry is named ".."). + +A new compiler may accidentally skip a `Dirent` with a zero `Ino` if emulating +a non-POSIX function and re-using `Dirent.Ino` for `d_fileno`. + +* Linux `getdents` doesn't define `d_fileno` must be non-zero +* BSD `getdirentries` is implementation specific. For example, OpenBSD will + return dirents with a zero `d_fileno`, but Darwin will skip them. + +The above shouldn't be a problem, even in the case of BSD, because `wasip1` is +defined more in terms of `getdents` than `getdirentries`. The bottom half of +either should treat `wasip1` (or any similar ABI such as wasix or wasip2) as a +different operating system and either use different logic that doesn't skip, or +synthesize a fake non-zero `d_fileno` when `d_ino` is zero. + +However, this has been a problem. Go's `syscall.ParseDirent` utility is shared +for all `GOOS=unix`. For simplicity, this abstracts `direntIno` with data from +`d_fileno` or `d_ino`, and drops if either are zero, even if `d_fileno` is the +only field with zero explicitly defined. This led to a change to special case +`GOOS=wasip1` as otherwise virtual files would be unconditionally skipped. + +In practice, this problem is rather unique due to so many compilers relying on +wasi-libc, which tolerates a zero inode. For example, while issues were +reported about the performance regression when wasi-libc began doing a fan-out +on zero `Dirent.Ino`, no issues were reported about dirents being dropped as a +result. + +In summary, rather than complicating implementation and forcing non-zero inodes +for a rare case, we permit zero. We instead document this topic thoroughly, so +that emerging compilers can re-use the research and reference it on conflict. +We also document that `Ino` should be non-zero, so that users implementing that +field will attempt to get it. + +See + * https://github.com/WebAssembly/wasi-filesystem/pull/81 + * https://github.com/WebAssembly/wasi-libc/blob/bd950eb128bff337153de217b11270f948d04bb4/libc-bottom-half/cloudlibc/src/libc/dirent/readdir.c#L87-L94 + * https://linux.die.net/man/3/getdents + * https://www.unix.com/man-page/osx/2/getdirentries/ + * https://man.openbsd.org/OpenBSD-5.4/getdirentries.2 + * https://github.com/golang/go/blob/go1.20/src/syscall/dirent.go#L60-L102 + * https://go-review.googlesource.com/c/go/+/507915 + ## sys.Walltime and Nanotime The `sys` package has two function types, `Walltime` and `Nanotime` for real diff --git a/imports/wasi_snapshot_preview1/fs.go b/imports/wasi_snapshot_preview1/fs.go index efb1be63..c2d755b0 100644 --- a/imports/wasi_snapshot_preview1/fs.go +++ b/imports/wasi_snapshot_preview1/fs.go @@ -1018,7 +1018,7 @@ func writeDirents(buf []byte, dirents []fsapi.Dirent, d_next uint64, direntCount } // writeDirent writes DirentSize bytes -func writeDirent(buf []byte, dNext uint64, ino uint64, dNamlen uint32, dType fs.FileMode) { +func writeDirent(buf []byte, dNext uint64, ino fsapi.Ino, dNamlen uint32, dType fs.FileMode) { le.PutUint64(buf, dNext) // d_next le.PutUint64(buf[8:], ino) // d_ino le.PutUint32(buf[16:], dNamlen) // d_namlen diff --git a/internal/fsapi/dir.go b/internal/fsapi/dir.go index 75f550cc..e70cc5c8 100644 --- a/internal/fsapi/dir.go +++ b/internal/fsapi/dir.go @@ -7,16 +7,38 @@ import ( "time" ) -// Dirent is an entry read from a directory. +// Ino is the file serial number, or zero if unknown. +// +// The inode is used for a file equivalence, like os.SameFile, so any constant +// value will interfere that. +// +// When zero is returned by File.Readdir, certain callers will fan-out to +// File.Stat to retrieve a non-zero value. Callers using this for darwin's +// definition of `getdirentries` conflate zero `d_fileno` with a deleted file +// and skip the entry. See /RATIONALE.md for more on this. +type Ino = uint64 + +// FileType is fs.FileMode masked on fs.ModeType. For example, zero is a +// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown. +// +// Note: This is defined by Linux, not POSIX. +type FileType = fs.FileMode + +// Dirent is an entry read from a directory via File.Readdir. // // # Notes // // - This extends `dirent` defined in POSIX with some fields defined by // Linux. See https://man7.org/linux/man-pages/man3/readdir.3.html and // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html +// - This has a subset of fields defined in Stat_t. Notably, there is no +// field corresponding to Stat_t.Dev because that value will be constant +// for all files in a directory. To get the Dev value, call File.Stat on +// the directory File.Readdir was called on. type Dirent struct { - // Ino is the file serial number, or zero if not available. - Ino uint64 + // Ino is the file serial number, or zero if not available. See Ino for + // more details including impact returning a zero value. + Ino Ino // Name is the base name of the directory entry. Empty is invalid. Name string diff --git a/internal/fsapi/file.go b/internal/fsapi/file.go index ef17fa64..8f68f35f 100644 --- a/internal/fsapi/file.go +++ b/internal/fsapi/file.go @@ -55,7 +55,7 @@ type File interface { // // - Implementations should cache this result. // - This combined with Dev can implement os.SameFile. - Ino() (uint64, syscall.Errno) + Ino() (Ino, syscall.Errno) // IsDir returns true if this file is a directory or an error there was an // error retrieving this information. diff --git a/internal/fsapi/stat.go b/internal/fsapi/stat.go index bd5584c3..39f48a63 100644 --- a/internal/fsapi/stat.go +++ b/internal/fsapi/stat.go @@ -13,8 +13,9 @@ type Stat_t struct { // Dev is the device ID of device containing the file. Dev uint64 - // Ino is the file serial number. - Ino uint64 + // Ino is the file serial number, or zero if not available. See Ino for + // more details including impact returning a zero value. + Ino Ino // Uid is the user ID that owns the file, or zero if unsupported. // For example, this is unsupported on some virtual filesystems or windows. diff --git a/internal/fsapi/unimplemented.go b/internal/fsapi/unimplemented.go index d69f41fa..560c2f0f 100644 --- a/internal/fsapi/unimplemented.go +++ b/internal/fsapi/unimplemented.go @@ -97,7 +97,7 @@ func (UnimplementedFile) Dev() (uint64, syscall.Errno) { } // Ino implements File.Ino -func (UnimplementedFile) Ino() (uint64, syscall.Errno) { +func (UnimplementedFile) Ino() (Ino, syscall.Errno) { return 0, 0 } diff --git a/internal/sys/fs.go b/internal/sys/fs.go index 7d1c1056..2e0c04d3 100644 --- a/internal/sys/fs.go +++ b/internal/sys/fs.go @@ -139,7 +139,8 @@ func synthesizeDotEntries(f *FileEntry) ([]fsapi.Dirent, syscall.Errno) { } result := [2]fsapi.Dirent{} result[0] = fsapi.Dirent{Name: ".", Ino: dotIno, Type: fs.ModeDir} - // See /RATIONALE.md for why we don't attempt to get an inode for ".." + // See /RATIONALE.md for why we don't attempt to get an inode for ".." and + // why in wasi-libc this won't fan-out either. result[1] = fsapi.Dirent{Name: "..", Ino: 0, Type: fs.ModeDir} return result[:], 0 } diff --git a/internal/sys/lazy.go b/internal/sys/lazy.go index 024b855b..2fbbbd82 100644 --- a/internal/sys/lazy.go +++ b/internal/sys/lazy.go @@ -27,7 +27,7 @@ func (r *lazyDir) Dev() (uint64, syscall.Errno) { } // Ino implements the same method as documented on fsapi.File -func (r *lazyDir) Ino() (uint64, syscall.Errno) { +func (r *lazyDir) Ino() (fsapi.Ino, syscall.Errno) { if f, ok := r.file(); !ok { return 0, syscall.EBADF } else { diff --git a/internal/sysfs/file.go b/internal/sysfs/file.go index 74ef5170..bf31a610 100644 --- a/internal/sysfs/file.go +++ b/internal/sysfs/file.go @@ -124,7 +124,7 @@ type cachedStat struct { dev uint64 // dev is the same as fsapi.Stat_t Ino. - ino uint64 + ino fsapi.Ino // isDir is fsapi.Stat_t Mode masked with fs.ModeDir isDir bool @@ -132,7 +132,7 @@ type cachedStat struct { // cachedStat returns the cacheable parts of fsapi.Stat_t or an error if they // couldn't be retrieved. -func (f *fsFile) cachedStat() (dev, ino uint64, isDir bool, errno syscall.Errno) { +func (f *fsFile) cachedStat() (dev uint64, ino fsapi.Ino, isDir bool, errno syscall.Errno) { if f.cachedSt == nil { if _, errno = f.Stat(); errno != 0 { return @@ -148,7 +148,7 @@ func (f *fsFile) Dev() (uint64, syscall.Errno) { } // Ino implements the same method as documented on fsapi.File -func (f *fsFile) Ino() (uint64, syscall.Errno) { +func (f *fsFile) Ino() (fsapi.Ino, syscall.Errno) { _, ino, _, errno := f.cachedStat() return ino, errno } @@ -435,7 +435,7 @@ func readdir(f readdirFile, path string, n int) (dirents []fsapi.Dirent, errno s dirents = make([]fsapi.Dirent, 0, len(fis)) // linux/darwin won't have to fan out to lstat, but windows will. - var ino uint64 + var ino fsapi.Ino for fi := range fis { t := fis[fi] if ino, errno = inoFromFileInfo(path, t); errno != 0 { diff --git a/internal/sysfs/file_test.go b/internal/sysfs/file_test.go index 269aef8c..6fba3b6c 100644 --- a/internal/sysfs/file_test.go +++ b/internal/sysfs/file_test.go @@ -151,7 +151,7 @@ func TestFileIno(t *testing.T) { tests := []struct { name string fs fs.FS - expectedIno uint64 + expectedIno fsapi.Ino }{ {name: "os.DirFS", fs: dirFS, expectedIno: st.Ino}, {name: "embed.api.FS", fs: embedFS}, diff --git a/internal/sysfs/osfile.go b/internal/sysfs/osfile.go index 2b15928e..fa31dc36 100644 --- a/internal/sysfs/osfile.go +++ b/internal/sysfs/osfile.go @@ -44,7 +44,7 @@ type osFile struct { // cachedStat returns the cacheable parts of fsapi.Stat_t or an error if they // couldn't be retrieved. -func (f *osFile) cachedStat() (dev, ino uint64, isDir bool, errno syscall.Errno) { +func (f *osFile) cachedStat() (dev uint64, ino fsapi.Ino, isDir bool, errno syscall.Errno) { if f.cachedSt == nil { if _, errno = f.Stat(); errno != 0 { return @@ -60,7 +60,7 @@ func (f *osFile) Dev() (uint64, syscall.Errno) { } // Ino implements the same method as documented on fsapi.File -func (f *osFile) Ino() (uint64, syscall.Errno) { +func (f *osFile) Ino() (fsapi.Ino, syscall.Errno) { _, ino, _, errno := f.cachedStat() return ino, errno } diff --git a/internal/sysfs/readfs.go b/internal/sysfs/readfs.go index 6457b991..125ad316 100644 --- a/internal/sysfs/readfs.go +++ b/internal/sysfs/readfs.go @@ -76,7 +76,7 @@ func (r *readFile) Dev() (uint64, syscall.Errno) { } // Ino implements the same method as documented on fsapi.File. -func (r *readFile) Ino() (uint64, syscall.Errno) { +func (r *readFile) Ino() (fsapi.Ino, syscall.Errno) { return r.f.Ino() } diff --git a/internal/sysfs/stat_bsd.go b/internal/sysfs/stat_bsd.go index f0dc673f..481860bf 100644 --- a/internal/sysfs/stat_bsd.go +++ b/internal/sysfs/stat_bsd.go @@ -31,7 +31,7 @@ func statFile(f fs.File) (fsapi.Stat_t, syscall.Errno) { return defaultStatFile(f) } -func inoFromFileInfo(_ string, t fs.FileInfo) (ino uint64, err syscall.Errno) { +func inoFromFileInfo(_ string, t fs.FileInfo) (ino fsapi.Ino, err syscall.Errno) { if d, ok := t.Sys().(*syscall.Stat_t); ok { ino = d.Ino } diff --git a/internal/sysfs/stat_linux.go b/internal/sysfs/stat_linux.go index c1eeaabe..bd2a2d3d 100644 --- a/internal/sysfs/stat_linux.go +++ b/internal/sysfs/stat_linux.go @@ -34,7 +34,7 @@ func statFile(f fs.File) (fsapi.Stat_t, syscall.Errno) { return defaultStatFile(f) } -func inoFromFileInfo(_ string, t fs.FileInfo) (ino uint64, err syscall.Errno) { +func inoFromFileInfo(_ string, t fs.FileInfo) (ino fsapi.Ino, err syscall.Errno) { if d, ok := t.Sys().(*syscall.Stat_t); ok { ino = d.Ino } diff --git a/internal/sysfs/stat_unsupported.go b/internal/sysfs/stat_unsupported.go index 5bdd062a..8e214c90 100644 --- a/internal/sysfs/stat_unsupported.go +++ b/internal/sysfs/stat_unsupported.go @@ -33,7 +33,7 @@ func statFile(f fs.File) (fsapi.Stat_t, syscall.Errno) { return defaultStatFile(f) } -func inoFromFileInfo(_ string, t fs.FileInfo) (ino uint64, err syscall.Errno) { +func inoFromFileInfo(_ string, t fs.FileInfo) (ino fsapi.Ino, err syscall.Errno) { return } diff --git a/internal/sysfs/stat_windows.go b/internal/sysfs/stat_windows.go index 2283ec0d..71bc7308 100644 --- a/internal/sysfs/stat_windows.go +++ b/internal/sysfs/stat_windows.go @@ -72,7 +72,7 @@ func statFile(f fs.File) (fsapi.Stat_t, syscall.Errno) { } // inoFromFileInfo uses stat to get the inode information of the file. -func inoFromFileInfo(filePath string, t fs.FileInfo) (ino uint64, errno syscall.Errno) { +func inoFromFileInfo(filePath string, t fs.FileInfo) (ino fsapi.Ino, errno syscall.Errno) { if filePath == "" { // This is a fs.File backed implementation which doesn't have access to // the original file path.