diff --git a/internal/engine/compiler/impl_amd64.go b/internal/engine/compiler/impl_amd64.go index d0d46442..1012768f 100644 --- a/internal/engine/compiler/impl_amd64.go +++ b/internal/engine/compiler/impl_amd64.go @@ -8,7 +8,6 @@ import ( "bytes" "fmt" "math" - "runtime" "github.com/tetratelabs/wazero/internal/asm" "github.com/tetratelabs/wazero/internal/asm/amd64" @@ -83,8 +82,9 @@ func (c *amd64Compiler) compileNOP() asm.Node { } type amd64Compiler struct { - assembler amd64.Assembler - ir *wazeroir.CompilationResult + assembler amd64.Assembler + ir *wazeroir.CompilationResult + cpuFeatures platform.CpuFeatureFlags // locationStack holds the state of wazeroir virtual stack. // and each item is either placed in register or the actual memory stack. locationStack *runtimeValueLocationStack @@ -103,6 +103,7 @@ func newAmd64Compiler() compiler { c := &amd64Compiler{ assembler: amd64.NewAssembler(), locationStack: newRuntimeValueLocationStack(), + cpuFeatures: platform.CpuFeatures, } return c } @@ -114,6 +115,7 @@ func (c *amd64Compiler) Init(ir *wazeroir.CompilationResult, withListener bool) *c = amd64Compiler{ labels: map[string]*amd64LabelInfo{}, ir: ir, + cpuFeatures: c.cpuFeatures, withListener: withListener, currentLabel: wazeroir.EntrypointLabel, } @@ -1170,7 +1172,7 @@ func (c *amd64Compiler) compileClz(o *wazeroir.OperationClz) error { return err } - if runtime.GOOS != "darwin" && runtime.GOOS != "freebsd" { + if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { if o.Type == wazeroir.UnsignedInt32 { c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register) } else { @@ -1233,7 +1235,7 @@ func (c *amd64Compiler) compileCtz(o *wazeroir.OperationCtz) error { return err } - if runtime.GOOS != "darwin" && runtime.GOOS != "freebsd" { + if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { if o.Type == wazeroir.UnsignedInt32 { c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) } else { diff --git a/internal/engine/compiler/impl_amd64_test.go b/internal/engine/compiler/impl_amd64_test.go index 6ee6ceb5..724fc754 100644 --- a/internal/engine/compiler/impl_amd64_test.go +++ b/internal/engine/compiler/impl_amd64_test.go @@ -1,11 +1,13 @@ package compiler import ( + "encoding/hex" "testing" "unsafe" "github.com/tetratelabs/wazero/internal/asm" "github.com/tetratelabs/wazero/internal/asm/amd64" + "github.com/tetratelabs/wazero/internal/platform" "github.com/tetratelabs/wazero/internal/testing/require" "github.com/tetratelabs/wazero/internal/wasm" "github.com/tetratelabs/wazero/internal/wazeroir" @@ -441,6 +443,132 @@ func TestAmd64Compiler_preventCrossedTargetdRegisters(t *testing.T) { } } +// mockCpuFlags implements platform.CpuFeatureFlags +type mockCpuFlags struct { + flags uint64 + extraFlags uint64 +} + +// Has implements the method of the same name in platform.CpuFeatureFlags +func (f *mockCpuFlags) Has(flag uint64) bool { + return (f.flags & flag) != 0 +} + +// HasExtra implements the method of the same name in platform.CpuFeatureFlags +func (f *mockCpuFlags) HasExtra(flag uint64) bool { + return (f.extraFlags & flag) != 0 +} + +// Relates to #1111 (Clz): older AMD64 CPUs do not support the LZCNT instruction +// CPUID should be used instead. We simulate presence/absence of the feature +// by overriding the field in the corresponding struct. +func TestAmd64Compiler_ensureClz_ABM(t *testing.T) { + tests := []struct { + name string + cpuFeatures platform.CpuFeatureFlags + expectedCode string + }{ + { + name: "with ABM", + expectedCode: "b80a000000f3480fbdc0", + cpuFeatures: &mockCpuFlags{ + flags: 0, + extraFlags: platform.CpuExtraFeatureABM, + }, + }, + { + name: "without ABM", + expectedCode: "b80a0000004883f8007507b840000000eb08480fbdc04883f03f", + cpuFeatures: &mockCpuFlags{ + flags: 0, + extraFlags: 0, // no flags, thus no ABM, i.e. no LZCNT + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + env := newCompilerEnvironment() + + newCompiler := func() compiler { + c := newCompiler().(*amd64Compiler) + // override auto-detected CPU features with the test case + c.cpuFeatures = tt.cpuFeatures + return c + } + + compiler := env.requireNewCompiler(t, newCompiler, nil) + + err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 10}) + require.NoError(t, err) + + err = compiler.compileClz(&wazeroir.OperationClz{Type: wazeroir.UnsignedInt64}) + require.NoError(t, err) + + compiler.compileNOP() // pad for jump target (when no ABM) + + code, _, err := compiler.compile() + require.NoError(t, err) + + require.Equal(t, tt.expectedCode, hex.EncodeToString(code)) + }) + } +} + +// Relates to #1111 (Ctz): older AMD64 CPUs do not support the LZCNT instruction +// CPUID should be used instead. We simulate presence/absence of the feature +// by overriding the field in the corresponding struct. +func TestAmd64Compiler_ensureCtz_ABM(t *testing.T) { + tests := []struct { + name string + cpuFeatures platform.CpuFeatureFlags + expectedCode string + }{ + { + name: "with ABM", + expectedCode: "b80a000000f3480fbcc0", + cpuFeatures: &mockCpuFlags{ + flags: 0, + extraFlags: platform.CpuExtraFeatureABM, + }, + }, + { + name: "without ABM", + expectedCode: "b80a0000004883f8007507b840000000eb05f3480fbcc0", + cpuFeatures: &mockCpuFlags{ + flags: 0, + extraFlags: 0, // no flags, thus no ABM, i.e. no LZCNT + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + env := newCompilerEnvironment() + + newCompiler := func() compiler { + c := newCompiler().(*amd64Compiler) + // override auto-detected CPU features with the test case + c.cpuFeatures = tt.cpuFeatures + return c + } + + compiler := env.requireNewCompiler(t, newCompiler, nil) + + err := compiler.compileConstI32(&wazeroir.OperationConstI32{Value: 10}) + require.NoError(t, err) + + err = compiler.compileCtz(&wazeroir.OperationCtz{Type: wazeroir.UnsignedInt64}) + require.NoError(t, err) + + compiler.compileNOP() // pad for jump target (when no ABM) + + code, _, err := compiler.compile() + require.NoError(t, err) + + require.Equal(t, tt.expectedCode, hex.EncodeToString(code)) + }) + } +} + // collectRegistersFromRuntimeValues returns the registers occupied by locs. func collectRegistersFromRuntimeValues(locs []*runtimeValueLocation) []asm.Register { out := make([]asm.Register, len(locs)) diff --git a/internal/platform/cpuid_amd64.go b/internal/platform/cpuid_amd64.go new file mode 100644 index 00000000..2702d316 --- /dev/null +++ b/internal/platform/cpuid_amd64.go @@ -0,0 +1,79 @@ +package platform + +const ( + // CpuFeatureSSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities + CpuFeatureSSE3 = uint64(1) + // CpuFeatureSSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities + CpuFeatureSSE4_1 = uint64(1) << 19 + // CpuFeatureSSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities + CpuFeatureSSE4_2 = uint64(1) << 20 +) + +const ( + // CpuExtraFeatureABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) + CpuExtraFeatureABM = uint64(1) << 5 +) + +// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods +var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags() + +// CpuFeatureFlags exposes methods for querying CPU capabilities +type CpuFeatureFlags interface { + // Has returns true when the specified flag (represented as uint64) is supported + Has(cpuFeature uint64) bool + // HasExtra returns true when the specified extraFlag (represented as uint64) is supported + HasExtra(cpuFeature uint64) bool +} + +// cpuFeatureFlags implements CpuFeatureFlags interface +type cpuFeatureFlags struct { + flags uint64 + extraFlags uint64 +} + +// cpuid exposes the CPUID instruction to the Go layer (https://www.amd.com/system/files/TechDocs/25481.pdf) +// implemented in impl_amd64.s +func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) + +// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap +func cpuidAsBitmap(arg1, arg2 uint32) uint64 { + _ /* eax */, _ /* ebx */, ecx, edx := cpuid(arg1, arg2) + return (uint64(edx) << 32) | uint64(ecx) +} + +// loadStandardRange load flags from the standard range, panics otherwise +func loadStandardRange(id uint32) uint64 { + // ensure that the id is in the valid range, returned by cpuid(0,0) + maxRange, _, _, _ := cpuid(0, 0) + if id > maxRange { + panic("cannot query standard CPU flags") + } + return cpuidAsBitmap(id, 0) +} + +// loadStandardRange load flags from the extended range, panics otherwise +func loadExtendedRange(id uint32) uint64 { + // ensure that the id is in the valid range, returned by cpuid(0x80000000,0) + maxRange, _, _, _ := cpuid(0x80000000, 0) + if id > maxRange { + panic("cannot query extended CPU flags") + } + return cpuidAsBitmap(id, 0) +} + +func loadCpuFeatureFlags() CpuFeatureFlags { + return &cpuFeatureFlags{ + flags: loadStandardRange(1), + extraFlags: loadExtendedRange(0x80000001), + } +} + +// Has implements the same method on the CpuFeatureFlags interface +func (f *cpuFeatureFlags) Has(cpuFeature uint64) bool { + return (f.flags & cpuFeature) != 0 +} + +// HasExtra implements the same method on the CpuFeatureFlags interface +func (f *cpuFeatureFlags) HasExtra(cpuFeature uint64) bool { + return (f.extraFlags & cpuFeature) != 0 +} diff --git a/internal/platform/cpuid_amd64.s b/internal/platform/cpuid_amd64.s new file mode 100644 index 00000000..8d483f3a --- /dev/null +++ b/internal/platform/cpuid_amd64.s @@ -0,0 +1,14 @@ +#include "textflag.h" + +// lifted from github.com/intel-go/cpuid and src/internal/cpu/cpu_x86.s +// func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid(SB), NOSPLIT, $0-24 + MOVL arg1+0(FP), AX + MOVL arg2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + diff --git a/internal/platform/cpuid_amd64_test.go b/internal/platform/cpuid_amd64_test.go new file mode 100644 index 00000000..05e3d4a3 --- /dev/null +++ b/internal/platform/cpuid_amd64_test.go @@ -0,0 +1,18 @@ +package platform + +import ( + "testing" + + "github.com/tetratelabs/wazero/internal/testing/require" +) + +func TestAmd64CpuId_cpuHasFeature(t *testing.T) { + flags := cpuFeatureFlags{ + flags: CpuFeatureSSE3, + extraFlags: CpuExtraFeatureABM, + } + require.True(t, flags.Has(CpuFeatureSSE3)) + require.False(t, flags.Has(CpuFeatureSSE4_2)) + require.True(t, flags.HasExtra(CpuExtraFeatureABM)) + require.False(t, flags.HasExtra(1<<6)) // some other value +}