wazevo(amd64): lowering for popcnt, ctz, clz + cpuid refactor (#1959)
Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
This commit is contained in:
@@ -1268,7 +1268,7 @@ func (c *amd64Compiler) compileClz(o *wazeroir.UnionOperation) error {
|
||||
}
|
||||
|
||||
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) {
|
||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||
if unsignedInt == wazeroir.UnsignedInt32 {
|
||||
c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register)
|
||||
} else {
|
||||
@@ -1331,7 +1331,7 @@ func (c *amd64Compiler) compileCtz(o *wazeroir.UnionOperation) error {
|
||||
}
|
||||
|
||||
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) {
|
||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||
if unsignedInt == wazeroir.UnsignedInt32 {
|
||||
c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register)
|
||||
} else {
|
||||
|
||||
@@ -470,17 +470,17 @@ func TestAmd64Compiler_preventCrossedTargetdRegisters(t *testing.T) {
|
||||
|
||||
// mockCpuFlags implements platform.CpuFeatureFlags
|
||||
type mockCpuFlags struct {
|
||||
flags uint64
|
||||
extraFlags uint64
|
||||
flags platform.CpuFeature
|
||||
extraFlags platform.CpuFeature
|
||||
}
|
||||
|
||||
// Has implements the method of the same name in platform.CpuFeatureFlags
|
||||
func (f *mockCpuFlags) Has(flag uint64) bool {
|
||||
func (f *mockCpuFlags) Has(flag platform.CpuFeature) bool {
|
||||
return (f.flags & flag) != 0
|
||||
}
|
||||
|
||||
// HasExtra implements the method of the same name in platform.CpuFeatureFlags
|
||||
func (f *mockCpuFlags) HasExtra(flag uint64) bool {
|
||||
func (f *mockCpuFlags) HasExtra(flag platform.CpuFeature) bool {
|
||||
return (f.extraFlags & flag) != 0
|
||||
}
|
||||
|
||||
@@ -498,7 +498,7 @@ func TestAmd64Compiler_ensureClz_ABM(t *testing.T) {
|
||||
expectedCode: "b80a000000f3480fbdc0",
|
||||
cpuFeatures: &mockCpuFlags{
|
||||
flags: 0,
|
||||
extraFlags: platform.CpuExtraFeatureABM,
|
||||
extraFlags: platform.CpuExtraFeatureAmd64ABM,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -556,7 +556,7 @@ func TestAmd64Compiler_ensureCtz_ABM(t *testing.T) {
|
||||
expectedCode: "b80a000000f3480fbcc0",
|
||||
cpuFeatures: &mockCpuFlags{
|
||||
flags: 0,
|
||||
extraFlags: platform.CpuExtraFeatureABM,
|
||||
extraFlags: platform.CpuExtraFeatureAmd64ABM,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1574,6 +1574,7 @@ var defKinds = [instrMax]defKind{
|
||||
aluRmiR: defKindNone,
|
||||
shiftR: defKindNone,
|
||||
imm: defKindOp2,
|
||||
unaryRmR: defKindOp2,
|
||||
xmmUnaryRmR: defKindOp2,
|
||||
mov64MR: defKindOp2,
|
||||
movsxRmR: defKindOp2,
|
||||
@@ -1626,6 +1627,7 @@ var useKinds = [instrMax]useKind{
|
||||
aluRmiR: useKindOp1Op2Reg,
|
||||
shiftR: useKindOp1Op2Reg,
|
||||
imm: useKindNone,
|
||||
unaryRmR: useKindOp1,
|
||||
xmmUnaryRmR: useKindOp1,
|
||||
mov64MR: useKindOp1,
|
||||
movzxRmR: useKindOp1,
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/platform"
|
||||
)
|
||||
|
||||
// NewBackend returns a new backend for arm64.
|
||||
@@ -21,9 +22,10 @@ func NewBackend() backend.Machine {
|
||||
asNop,
|
||||
)
|
||||
return &machine{
|
||||
ectx: ectx,
|
||||
regAlloc: regalloc.NewAllocator(regInfo),
|
||||
spillSlots: map[regalloc.VRegID]int64{},
|
||||
ectx: ectx,
|
||||
cpuFeatures: platform.CpuFeatures,
|
||||
regAlloc: regalloc.NewAllocator(regInfo),
|
||||
spillSlots: map[regalloc.VRegID]int64{},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +36,8 @@ type (
|
||||
ectx *backend.ExecutableContextT[instruction]
|
||||
stackBoundsCheckDisabled bool
|
||||
|
||||
cpuFeatures platform.CpuFeatureFlags
|
||||
|
||||
regAlloc regalloc.Allocator
|
||||
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
||||
regAllocStarted bool
|
||||
@@ -218,12 +222,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
|
||||
m.lowerShiftR(instr, shiftROpShiftLeft)
|
||||
case ssa.OpcodeSshr:
|
||||
m.lowerShiftR(instr, shiftROpShiftRightArithmetic)
|
||||
case ssa.OpcodeUshr:
|
||||
m.lowerShiftR(instr, shiftROpShiftRightLogical)
|
||||
case ssa.OpcodeRotl:
|
||||
m.lowerShiftR(instr, shiftROpRotateLeft)
|
||||
case ssa.OpcodeRotr:
|
||||
m.lowerShiftR(instr, shiftROpRotateRight)
|
||||
case ssa.OpcodeUshr:
|
||||
m.lowerShiftR(instr, shiftROpShiftRightLogical)
|
||||
case ssa.OpcodeClz:
|
||||
m.lowerClz(instr)
|
||||
case ssa.OpcodeCtz:
|
||||
m.lowerCtz(instr)
|
||||
case ssa.OpcodePopcnt:
|
||||
m.lowerUnaryRmR(instr, unaryRmROpcodePopcnt)
|
||||
case ssa.OpcodeUndefined:
|
||||
m.insert(m.allocateInstr().asUD2())
|
||||
case ssa.OpcodeExitWithCode:
|
||||
@@ -320,6 +330,132 @@ func (m *machine) lowerVconst(res ssa.Value, lo, hi uint64) {
|
||||
jmp.asJmp(newOperandLabel(afterLoadLabel))
|
||||
}
|
||||
|
||||
func (m *machine) lowerCtz(instr *ssa.Instruction) {
|
||||
if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||
m.lowerUnaryRmR(instr, unaryRmROpcodeTzcnt)
|
||||
} else {
|
||||
// On processors that do not support TZCNT, the BSF instruction is
|
||||
// executed instead. The key difference between TZCNT and BSF
|
||||
// instruction is that if source operand is zero, the content of
|
||||
// destination operand is undefined.
|
||||
// https://www.felixcloutier.com/x86/tzcnt.html
|
||||
|
||||
x := instr.Arg()
|
||||
if !x.Type().IsInt() {
|
||||
panic("BUG?")
|
||||
}
|
||||
_64 := x.Type().Bits() == 64
|
||||
|
||||
xDef := m.c.ValueDefinition(x)
|
||||
rm := m.getOperand_Reg(xDef)
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
// First, we have to check if the target is non-zero.
|
||||
test := m.allocateInstr()
|
||||
test.asCmpRmiR(false, rm, rm.r, _64)
|
||||
m.insert(test)
|
||||
|
||||
jmpNz := m.allocateInstr() // Will backpatch the operands later.
|
||||
m.insert(jmpNz)
|
||||
|
||||
// If the value is zero, we just push the const value.
|
||||
m.lowerIconst(rd, uint64(x.Type().Bits()), _64)
|
||||
|
||||
// Now jump right after the non-zero case.
|
||||
jmpAtEnd := m.allocateInstr() // Will backpatch later.
|
||||
m.insert(jmpAtEnd)
|
||||
|
||||
// jmpNz target label is set here.
|
||||
nop, nz := m.allocateBrTarget()
|
||||
jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
|
||||
m.insert(nop)
|
||||
|
||||
// Emit the non-zero case.
|
||||
bsr := m.allocateInstr()
|
||||
bsr.asUnaryRmR(unaryRmROpcodeBsf, rm, rd, _64)
|
||||
m.insert(bsr)
|
||||
|
||||
// jmpAtEnd target label is set here.
|
||||
nopEnd, end := m.allocateBrTarget()
|
||||
jmpAtEnd.asJmp(newOperandLabel(end))
|
||||
m.insert(nopEnd)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerClz(instr *ssa.Instruction) {
|
||||
if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||
m.lowerUnaryRmR(instr, unaryRmROpcodeLzcnt)
|
||||
} else {
|
||||
// On processors that do not support LZCNT, we combine BSR (calculating
|
||||
// most significant set bit) with XOR. This logic is described in
|
||||
// "Replace Raw Assembly Code with Builtin Intrinsics" section in:
|
||||
// https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code.
|
||||
|
||||
x := instr.Arg()
|
||||
if !x.Type().IsInt() {
|
||||
panic("BUG?")
|
||||
}
|
||||
_64 := x.Type().Bits() == 64
|
||||
|
||||
xDef := m.c.ValueDefinition(x)
|
||||
rm := m.getOperand_Reg(xDef)
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
// First, we have to check if the rm is non-zero as BSR is undefined
|
||||
// on zero. See https://www.felixcloutier.com/x86/bsr.
|
||||
test := m.allocateInstr()
|
||||
test.asCmpRmiR(false, rm, rm.r, _64)
|
||||
m.insert(test)
|
||||
|
||||
jmpNz := m.allocateInstr() // Will backpatch later.
|
||||
m.insert(jmpNz)
|
||||
|
||||
// If the value is zero, we just push the const value.
|
||||
m.lowerIconst(rd, uint64(x.Type().Bits()), _64)
|
||||
|
||||
// Now jump right after the non-zero case.
|
||||
jmpAtEnd := m.allocateInstr() // Will backpatch later.
|
||||
m.insert(jmpAtEnd)
|
||||
|
||||
// jmpNz target label is set here.
|
||||
nop, nz := m.allocateBrTarget()
|
||||
jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
|
||||
m.insert(nop)
|
||||
|
||||
// Emit the non-zero case.
|
||||
tmp := m.c.VRegOf(instr.Return())
|
||||
bsr := m.allocateInstr()
|
||||
bsr.asUnaryRmR(unaryRmROpcodeBsr, rm, tmp, _64)
|
||||
m.insert(bsr)
|
||||
|
||||
// Now we XOR the value with the bit length minus one.
|
||||
xor := m.allocateInstr()
|
||||
xor.asAluRmiR(aluRmiROpcodeXor, newOperandImm32(uint32(x.Type().Bits()-1)), tmp, _64)
|
||||
m.insert(xor)
|
||||
|
||||
// jmpAtEnd target label is set here.
|
||||
nopEnd, end := m.allocateBrTarget()
|
||||
jmpAtEnd.asJmp(newOperandLabel(end))
|
||||
m.insert(nopEnd)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerUnaryRmR(si *ssa.Instruction, op unaryRmROpcode) {
|
||||
x := si.Arg()
|
||||
if !x.Type().IsInt() {
|
||||
panic("BUG?")
|
||||
}
|
||||
_64 := x.Type().Bits() == 64
|
||||
|
||||
xDef := m.c.ValueDefinition(x)
|
||||
rm := m.getOperand_Imm32_Reg(xDef)
|
||||
rd := m.c.VRegOf(si.Return())
|
||||
|
||||
instr := m.allocateInstr()
|
||||
instr.asUnaryRmR(op, rm, rd, _64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, dst regalloc.VReg) {
|
||||
mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
load := m.allocateInstr()
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/platform"
|
||||
"github.com/tetratelabs/wazero/internal/testing/require"
|
||||
)
|
||||
|
||||
@@ -270,3 +271,161 @@ L1:
|
||||
ud2
|
||||
`, m.Format())
|
||||
}
|
||||
|
||||
func Test_machine_lowerClz(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
setup func(*mockCompiler, ssa.Builder, *machine) *backend.SSAValueDefinition
|
||||
cpuFlags platform.CpuFeatureFlags
|
||||
tpe ssa.Type
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "no extra flags (64)",
|
||||
cpuFlags: &mockCpuFlags{},
|
||||
tpe: ssa.TypeI64,
|
||||
exp: `
|
||||
testq %rax, %rax
|
||||
jnz L1
|
||||
movabsq $64, %rcx
|
||||
jmp L2
|
||||
L1:
|
||||
bsrq %rax, %rcx
|
||||
xor $63, %rcx
|
||||
L2:
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "ABM (64)",
|
||||
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||
tpe: ssa.TypeI64,
|
||||
exp: `
|
||||
lzcntq %rax, %rcx
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "no extra flags (32)",
|
||||
cpuFlags: &mockCpuFlags{},
|
||||
tpe: ssa.TypeI32,
|
||||
exp: `
|
||||
testl %eax, %eax
|
||||
jnz L1
|
||||
movl $32, %ecx
|
||||
jmp L2
|
||||
L1:
|
||||
bsrl %eax, %ecx
|
||||
xor $31, %ecx
|
||||
L2:
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "ABM (32)",
|
||||
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||
tpe: ssa.TypeI32,
|
||||
exp: `
|
||||
lzcntl %eax, %ecx
|
||||
`,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, b, m := newSetupWithMockContext()
|
||||
p := b.CurrentBlock().AddParam(b, tc.tpe)
|
||||
m.cpuFeatures = tc.cpuFlags
|
||||
|
||||
ctx.definitions[p] = &backend.SSAValueDefinition{BlockParamValue: p, BlkParamVReg: raxVReg}
|
||||
ctx.vRegMap[0] = rcxVReg
|
||||
instr := &ssa.Instruction{}
|
||||
instr.AsClz(p)
|
||||
m.lowerClz(instr)
|
||||
m.ectx.FlushPendingInstructions()
|
||||
m.ectx.RootInstr = m.ectx.PerBlockHead
|
||||
require.Equal(t, tc.exp, m.Format())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_machine_lowerCtz(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
setup func(*mockCompiler, ssa.Builder, *machine) *backend.SSAValueDefinition
|
||||
cpuFlags platform.CpuFeatureFlags
|
||||
tpe ssa.Type
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "no extra flags (64)",
|
||||
cpuFlags: &mockCpuFlags{},
|
||||
tpe: ssa.TypeI64,
|
||||
exp: `
|
||||
testq %rax, %rax
|
||||
jnz L1
|
||||
movabsq $64, %rcx
|
||||
jmp L2
|
||||
L1:
|
||||
bsfq %rax, %rcx
|
||||
L2:
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "ABM (64)",
|
||||
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||
tpe: ssa.TypeI64,
|
||||
exp: `
|
||||
tzcntq %rax, %rcx
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "no extra flags (32)",
|
||||
cpuFlags: &mockCpuFlags{},
|
||||
tpe: ssa.TypeI32,
|
||||
exp: `
|
||||
testl %eax, %eax
|
||||
jnz L1
|
||||
movl $32, %ecx
|
||||
jmp L2
|
||||
L1:
|
||||
bsfl %eax, %ecx
|
||||
L2:
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "ABM (32)",
|
||||
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||
tpe: ssa.TypeI32,
|
||||
exp: `
|
||||
tzcntl %eax, %ecx
|
||||
`,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, b, m := newSetupWithMockContext()
|
||||
p := b.CurrentBlock().AddParam(b, tc.tpe)
|
||||
m.cpuFeatures = tc.cpuFlags
|
||||
|
||||
ctx.definitions[p] = &backend.SSAValueDefinition{BlockParamValue: p, BlkParamVReg: raxVReg}
|
||||
ctx.vRegMap[0] = rcxVReg
|
||||
instr := &ssa.Instruction{}
|
||||
instr.AsCtz(p)
|
||||
m.lowerCtz(instr)
|
||||
m.ectx.FlushPendingInstructions()
|
||||
m.ectx.RootInstr = m.ectx.PerBlockHead
|
||||
require.Equal(t, tc.exp, m.Format())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// mockCpuFlags implements platform.CpuFeatureFlags
|
||||
type mockCpuFlags struct {
|
||||
flags platform.CpuFeature
|
||||
extraFlags platform.CpuFeature
|
||||
}
|
||||
|
||||
// Has implements the method of the same name in platform.CpuFeatureFlags
|
||||
func (f *mockCpuFlags) Has(flag platform.CpuFeature) bool {
|
||||
return (f.flags & flag) != 0
|
||||
}
|
||||
|
||||
// HasExtra implements the method of the same name in platform.CpuFeatureFlags
|
||||
func (f *mockCpuFlags) HasExtra(flag platform.CpuFeature) bool {
|
||||
return (f.extraFlags & flag) != 0
|
||||
}
|
||||
|
||||
@@ -122,6 +122,15 @@ func TestE2E(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "integer bit counts", m: testcases.IntegerBitCounts.Module,
|
||||
calls: []callCase{{
|
||||
params: []uint64{10, 100},
|
||||
expResults: []uint64{
|
||||
28, 1, 2, 57, 2, 3,
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "many_params_many_results",
|
||||
m: testcases.ManyParamsManyResults.Module,
|
||||
|
||||
25
internal/platform/cpuid.go
Normal file
25
internal/platform/cpuid.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package platform
|
||||
|
||||
// CpuFeatureFlags exposes methods for querying CPU capabilities
|
||||
type CpuFeatureFlags interface {
|
||||
// Has returns true when the specified flag (represented as uint64) is supported
|
||||
Has(cpuFeature CpuFeature) bool
|
||||
// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
|
||||
HasExtra(cpuFeature CpuFeature) bool
|
||||
}
|
||||
|
||||
type CpuFeature uint64
|
||||
|
||||
const (
|
||||
// CpuFeatureAmd64SSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities on amd64
|
||||
CpuFeatureAmd64SSE3 CpuFeature = 1
|
||||
// CpuFeatureAmd64SSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities on amd64
|
||||
CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19
|
||||
// CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64
|
||||
CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20
|
||||
)
|
||||
|
||||
const (
|
||||
// CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64
|
||||
CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5
|
||||
)
|
||||
@@ -1,30 +1,8 @@
|
||||
package platform
|
||||
|
||||
const (
|
||||
// CpuFeatureSSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities
|
||||
CpuFeatureSSE3 = uint64(1)
|
||||
// CpuFeatureSSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities
|
||||
CpuFeatureSSE4_1 = uint64(1) << 19
|
||||
// CpuFeatureSSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities
|
||||
CpuFeatureSSE4_2 = uint64(1) << 20
|
||||
)
|
||||
|
||||
const (
|
||||
// CpuExtraFeatureABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT)
|
||||
CpuExtraFeatureABM = uint64(1) << 5
|
||||
)
|
||||
|
||||
// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods
|
||||
var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags()
|
||||
|
||||
// CpuFeatureFlags exposes methods for querying CPU capabilities
|
||||
type CpuFeatureFlags interface {
|
||||
// Has returns true when the specified flag (represented as uint64) is supported
|
||||
Has(cpuFeature uint64) bool
|
||||
// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
|
||||
HasExtra(cpuFeature uint64) bool
|
||||
}
|
||||
|
||||
// cpuFeatureFlags implements CpuFeatureFlags interface
|
||||
type cpuFeatureFlags struct {
|
||||
flags uint64
|
||||
@@ -69,11 +47,11 @@ func loadCpuFeatureFlags() CpuFeatureFlags {
|
||||
}
|
||||
|
||||
// Has implements the same method on the CpuFeatureFlags interface
|
||||
func (f *cpuFeatureFlags) Has(cpuFeature uint64) bool {
|
||||
return (f.flags & cpuFeature) != 0
|
||||
func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool {
|
||||
return (f.flags & uint64(cpuFeature)) != 0
|
||||
}
|
||||
|
||||
// HasExtra implements the same method on the CpuFeatureFlags interface
|
||||
func (f *cpuFeatureFlags) HasExtra(cpuFeature uint64) bool {
|
||||
return (f.extraFlags & cpuFeature) != 0
|
||||
func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool {
|
||||
return (f.extraFlags & uint64(cpuFeature)) != 0
|
||||
}
|
||||
|
||||
@@ -8,11 +8,11 @@ import (
|
||||
|
||||
func TestAmd64CpuId_cpuHasFeature(t *testing.T) {
|
||||
flags := cpuFeatureFlags{
|
||||
flags: CpuFeatureSSE3,
|
||||
extraFlags: CpuExtraFeatureABM,
|
||||
flags: uint64(CpuFeatureAmd64SSE3),
|
||||
extraFlags: uint64(CpuExtraFeatureAmd64ABM),
|
||||
}
|
||||
require.True(t, flags.Has(CpuFeatureSSE3))
|
||||
require.False(t, flags.Has(CpuFeatureSSE4_2))
|
||||
require.True(t, flags.HasExtra(CpuExtraFeatureABM))
|
||||
require.True(t, flags.Has(CpuFeatureAmd64SSE3))
|
||||
require.False(t, flags.Has(CpuFeatureAmd64SSE4_2))
|
||||
require.True(t, flags.HasExtra(CpuExtraFeatureAmd64ABM))
|
||||
require.False(t, flags.HasExtra(1<<6)) // some other value
|
||||
}
|
||||
|
||||
14
internal/platform/cpuid_unsupported.go
Normal file
14
internal/platform/cpuid_unsupported.go
Normal file
@@ -0,0 +1,14 @@
|
||||
//go:build !amd64
|
||||
|
||||
package platform
|
||||
|
||||
var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{}
|
||||
|
||||
// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms
|
||||
type cpuFeatureFlags struct{}
|
||||
|
||||
// Has implements the same method on the CpuFeatureFlags interface
|
||||
func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false }
|
||||
|
||||
// HasExtra implements the same method on the CpuFeatureFlags interface
|
||||
func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false }
|
||||
@@ -3,5 +3,5 @@ package platform
|
||||
// init verifies that the current CPU supports the required AMD64 instructions
|
||||
func init() {
|
||||
// Ensure SSE4.1 is supported.
|
||||
archRequirementsVerified = CpuFeatures.Has(CpuFeatureSSE4_1)
|
||||
archRequirementsVerified = CpuFeatures.Has(CpuFeatureAmd64SSE4_1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user