wazevo(amd64): lowering for popcnt, ctz, clz + cpuid refactor (#1959)
Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
This commit is contained in:
@@ -1268,7 +1268,7 @@ func (c *amd64Compiler) compileClz(o *wazeroir.UnionOperation) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
||||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) {
|
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||||
if unsignedInt == wazeroir.UnsignedInt32 {
|
if unsignedInt == wazeroir.UnsignedInt32 {
|
||||||
c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register)
|
c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register)
|
||||||
} else {
|
} else {
|
||||||
@@ -1331,7 +1331,7 @@ func (c *amd64Compiler) compileCtz(o *wazeroir.UnionOperation) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
unsignedInt := wazeroir.UnsignedInt(o.B1)
|
||||||
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) {
|
if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||||
if unsignedInt == wazeroir.UnsignedInt32 {
|
if unsignedInt == wazeroir.UnsignedInt32 {
|
||||||
c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register)
|
c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -470,17 +470,17 @@ func TestAmd64Compiler_preventCrossedTargetdRegisters(t *testing.T) {
|
|||||||
|
|
||||||
// mockCpuFlags implements platform.CpuFeatureFlags
|
// mockCpuFlags implements platform.CpuFeatureFlags
|
||||||
type mockCpuFlags struct {
|
type mockCpuFlags struct {
|
||||||
flags uint64
|
flags platform.CpuFeature
|
||||||
extraFlags uint64
|
extraFlags platform.CpuFeature
|
||||||
}
|
}
|
||||||
|
|
||||||
// Has implements the method of the same name in platform.CpuFeatureFlags
|
// Has implements the method of the same name in platform.CpuFeatureFlags
|
||||||
func (f *mockCpuFlags) Has(flag uint64) bool {
|
func (f *mockCpuFlags) Has(flag platform.CpuFeature) bool {
|
||||||
return (f.flags & flag) != 0
|
return (f.flags & flag) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// HasExtra implements the method of the same name in platform.CpuFeatureFlags
|
// HasExtra implements the method of the same name in platform.CpuFeatureFlags
|
||||||
func (f *mockCpuFlags) HasExtra(flag uint64) bool {
|
func (f *mockCpuFlags) HasExtra(flag platform.CpuFeature) bool {
|
||||||
return (f.extraFlags & flag) != 0
|
return (f.extraFlags & flag) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -498,7 +498,7 @@ func TestAmd64Compiler_ensureClz_ABM(t *testing.T) {
|
|||||||
expectedCode: "b80a000000f3480fbdc0",
|
expectedCode: "b80a000000f3480fbdc0",
|
||||||
cpuFeatures: &mockCpuFlags{
|
cpuFeatures: &mockCpuFlags{
|
||||||
flags: 0,
|
flags: 0,
|
||||||
extraFlags: platform.CpuExtraFeatureABM,
|
extraFlags: platform.CpuExtraFeatureAmd64ABM,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -556,7 +556,7 @@ func TestAmd64Compiler_ensureCtz_ABM(t *testing.T) {
|
|||||||
expectedCode: "b80a000000f3480fbcc0",
|
expectedCode: "b80a000000f3480fbcc0",
|
||||||
cpuFeatures: &mockCpuFlags{
|
cpuFeatures: &mockCpuFlags{
|
||||||
flags: 0,
|
flags: 0,
|
||||||
extraFlags: platform.CpuExtraFeatureABM,
|
extraFlags: platform.CpuExtraFeatureAmd64ABM,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1574,6 +1574,7 @@ var defKinds = [instrMax]defKind{
|
|||||||
aluRmiR: defKindNone,
|
aluRmiR: defKindNone,
|
||||||
shiftR: defKindNone,
|
shiftR: defKindNone,
|
||||||
imm: defKindOp2,
|
imm: defKindOp2,
|
||||||
|
unaryRmR: defKindOp2,
|
||||||
xmmUnaryRmR: defKindOp2,
|
xmmUnaryRmR: defKindOp2,
|
||||||
mov64MR: defKindOp2,
|
mov64MR: defKindOp2,
|
||||||
movsxRmR: defKindOp2,
|
movsxRmR: defKindOp2,
|
||||||
@@ -1626,6 +1627,7 @@ var useKinds = [instrMax]useKind{
|
|||||||
aluRmiR: useKindOp1Op2Reg,
|
aluRmiR: useKindOp1Op2Reg,
|
||||||
shiftR: useKindOp1Op2Reg,
|
shiftR: useKindOp1Op2Reg,
|
||||||
imm: useKindNone,
|
imm: useKindNone,
|
||||||
|
unaryRmR: useKindOp1,
|
||||||
xmmUnaryRmR: useKindOp1,
|
xmmUnaryRmR: useKindOp1,
|
||||||
mov64MR: useKindOp1,
|
mov64MR: useKindOp1,
|
||||||
movzxRmR: useKindOp1,
|
movzxRmR: useKindOp1,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||||
|
"github.com/tetratelabs/wazero/internal/platform"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewBackend returns a new backend for arm64.
|
// NewBackend returns a new backend for arm64.
|
||||||
@@ -21,9 +22,10 @@ func NewBackend() backend.Machine {
|
|||||||
asNop,
|
asNop,
|
||||||
)
|
)
|
||||||
return &machine{
|
return &machine{
|
||||||
ectx: ectx,
|
ectx: ectx,
|
||||||
regAlloc: regalloc.NewAllocator(regInfo),
|
cpuFeatures: platform.CpuFeatures,
|
||||||
spillSlots: map[regalloc.VRegID]int64{},
|
regAlloc: regalloc.NewAllocator(regInfo),
|
||||||
|
spillSlots: map[regalloc.VRegID]int64{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,6 +36,8 @@ type (
|
|||||||
ectx *backend.ExecutableContextT[instruction]
|
ectx *backend.ExecutableContextT[instruction]
|
||||||
stackBoundsCheckDisabled bool
|
stackBoundsCheckDisabled bool
|
||||||
|
|
||||||
|
cpuFeatures platform.CpuFeatureFlags
|
||||||
|
|
||||||
regAlloc regalloc.Allocator
|
regAlloc regalloc.Allocator
|
||||||
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
||||||
regAllocStarted bool
|
regAllocStarted bool
|
||||||
@@ -218,12 +222,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
|
|||||||
m.lowerShiftR(instr, shiftROpShiftLeft)
|
m.lowerShiftR(instr, shiftROpShiftLeft)
|
||||||
case ssa.OpcodeSshr:
|
case ssa.OpcodeSshr:
|
||||||
m.lowerShiftR(instr, shiftROpShiftRightArithmetic)
|
m.lowerShiftR(instr, shiftROpShiftRightArithmetic)
|
||||||
|
case ssa.OpcodeUshr:
|
||||||
|
m.lowerShiftR(instr, shiftROpShiftRightLogical)
|
||||||
case ssa.OpcodeRotl:
|
case ssa.OpcodeRotl:
|
||||||
m.lowerShiftR(instr, shiftROpRotateLeft)
|
m.lowerShiftR(instr, shiftROpRotateLeft)
|
||||||
case ssa.OpcodeRotr:
|
case ssa.OpcodeRotr:
|
||||||
m.lowerShiftR(instr, shiftROpRotateRight)
|
m.lowerShiftR(instr, shiftROpRotateRight)
|
||||||
case ssa.OpcodeUshr:
|
case ssa.OpcodeClz:
|
||||||
m.lowerShiftR(instr, shiftROpShiftRightLogical)
|
m.lowerClz(instr)
|
||||||
|
case ssa.OpcodeCtz:
|
||||||
|
m.lowerCtz(instr)
|
||||||
|
case ssa.OpcodePopcnt:
|
||||||
|
m.lowerUnaryRmR(instr, unaryRmROpcodePopcnt)
|
||||||
case ssa.OpcodeUndefined:
|
case ssa.OpcodeUndefined:
|
||||||
m.insert(m.allocateInstr().asUD2())
|
m.insert(m.allocateInstr().asUD2())
|
||||||
case ssa.OpcodeExitWithCode:
|
case ssa.OpcodeExitWithCode:
|
||||||
@@ -320,6 +330,132 @@ func (m *machine) lowerVconst(res ssa.Value, lo, hi uint64) {
|
|||||||
jmp.asJmp(newOperandLabel(afterLoadLabel))
|
jmp.asJmp(newOperandLabel(afterLoadLabel))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *machine) lowerCtz(instr *ssa.Instruction) {
|
||||||
|
if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||||
|
m.lowerUnaryRmR(instr, unaryRmROpcodeTzcnt)
|
||||||
|
} else {
|
||||||
|
// On processors that do not support TZCNT, the BSF instruction is
|
||||||
|
// executed instead. The key difference between TZCNT and BSF
|
||||||
|
// instruction is that if source operand is zero, the content of
|
||||||
|
// destination operand is undefined.
|
||||||
|
// https://www.felixcloutier.com/x86/tzcnt.html
|
||||||
|
|
||||||
|
x := instr.Arg()
|
||||||
|
if !x.Type().IsInt() {
|
||||||
|
panic("BUG?")
|
||||||
|
}
|
||||||
|
_64 := x.Type().Bits() == 64
|
||||||
|
|
||||||
|
xDef := m.c.ValueDefinition(x)
|
||||||
|
rm := m.getOperand_Reg(xDef)
|
||||||
|
rd := m.c.VRegOf(instr.Return())
|
||||||
|
|
||||||
|
// First, we have to check if the target is non-zero.
|
||||||
|
test := m.allocateInstr()
|
||||||
|
test.asCmpRmiR(false, rm, rm.r, _64)
|
||||||
|
m.insert(test)
|
||||||
|
|
||||||
|
jmpNz := m.allocateInstr() // Will backpatch the operands later.
|
||||||
|
m.insert(jmpNz)
|
||||||
|
|
||||||
|
// If the value is zero, we just push the const value.
|
||||||
|
m.lowerIconst(rd, uint64(x.Type().Bits()), _64)
|
||||||
|
|
||||||
|
// Now jump right after the non-zero case.
|
||||||
|
jmpAtEnd := m.allocateInstr() // Will backpatch later.
|
||||||
|
m.insert(jmpAtEnd)
|
||||||
|
|
||||||
|
// jmpNz target label is set here.
|
||||||
|
nop, nz := m.allocateBrTarget()
|
||||||
|
jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
|
||||||
|
m.insert(nop)
|
||||||
|
|
||||||
|
// Emit the non-zero case.
|
||||||
|
bsr := m.allocateInstr()
|
||||||
|
bsr.asUnaryRmR(unaryRmROpcodeBsf, rm, rd, _64)
|
||||||
|
m.insert(bsr)
|
||||||
|
|
||||||
|
// jmpAtEnd target label is set here.
|
||||||
|
nopEnd, end := m.allocateBrTarget()
|
||||||
|
jmpAtEnd.asJmp(newOperandLabel(end))
|
||||||
|
m.insert(nopEnd)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *machine) lowerClz(instr *ssa.Instruction) {
|
||||||
|
if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
|
||||||
|
m.lowerUnaryRmR(instr, unaryRmROpcodeLzcnt)
|
||||||
|
} else {
|
||||||
|
// On processors that do not support LZCNT, we combine BSR (calculating
|
||||||
|
// most significant set bit) with XOR. This logic is described in
|
||||||
|
// "Replace Raw Assembly Code with Builtin Intrinsics" section in:
|
||||||
|
// https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code.
|
||||||
|
|
||||||
|
x := instr.Arg()
|
||||||
|
if !x.Type().IsInt() {
|
||||||
|
panic("BUG?")
|
||||||
|
}
|
||||||
|
_64 := x.Type().Bits() == 64
|
||||||
|
|
||||||
|
xDef := m.c.ValueDefinition(x)
|
||||||
|
rm := m.getOperand_Reg(xDef)
|
||||||
|
rd := m.c.VRegOf(instr.Return())
|
||||||
|
|
||||||
|
// First, we have to check if the rm is non-zero as BSR is undefined
|
||||||
|
// on zero. See https://www.felixcloutier.com/x86/bsr.
|
||||||
|
test := m.allocateInstr()
|
||||||
|
test.asCmpRmiR(false, rm, rm.r, _64)
|
||||||
|
m.insert(test)
|
||||||
|
|
||||||
|
jmpNz := m.allocateInstr() // Will backpatch later.
|
||||||
|
m.insert(jmpNz)
|
||||||
|
|
||||||
|
// If the value is zero, we just push the const value.
|
||||||
|
m.lowerIconst(rd, uint64(x.Type().Bits()), _64)
|
||||||
|
|
||||||
|
// Now jump right after the non-zero case.
|
||||||
|
jmpAtEnd := m.allocateInstr() // Will backpatch later.
|
||||||
|
m.insert(jmpAtEnd)
|
||||||
|
|
||||||
|
// jmpNz target label is set here.
|
||||||
|
nop, nz := m.allocateBrTarget()
|
||||||
|
jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
|
||||||
|
m.insert(nop)
|
||||||
|
|
||||||
|
// Emit the non-zero case.
|
||||||
|
tmp := m.c.VRegOf(instr.Return())
|
||||||
|
bsr := m.allocateInstr()
|
||||||
|
bsr.asUnaryRmR(unaryRmROpcodeBsr, rm, tmp, _64)
|
||||||
|
m.insert(bsr)
|
||||||
|
|
||||||
|
// Now we XOR the value with the bit length minus one.
|
||||||
|
xor := m.allocateInstr()
|
||||||
|
xor.asAluRmiR(aluRmiROpcodeXor, newOperandImm32(uint32(x.Type().Bits()-1)), tmp, _64)
|
||||||
|
m.insert(xor)
|
||||||
|
|
||||||
|
// jmpAtEnd target label is set here.
|
||||||
|
nopEnd, end := m.allocateBrTarget()
|
||||||
|
jmpAtEnd.asJmp(newOperandLabel(end))
|
||||||
|
m.insert(nopEnd)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *machine) lowerUnaryRmR(si *ssa.Instruction, op unaryRmROpcode) {
|
||||||
|
x := si.Arg()
|
||||||
|
if !x.Type().IsInt() {
|
||||||
|
panic("BUG?")
|
||||||
|
}
|
||||||
|
_64 := x.Type().Bits() == 64
|
||||||
|
|
||||||
|
xDef := m.c.ValueDefinition(x)
|
||||||
|
rm := m.getOperand_Imm32_Reg(xDef)
|
||||||
|
rd := m.c.VRegOf(si.Return())
|
||||||
|
|
||||||
|
instr := m.allocateInstr()
|
||||||
|
instr.asUnaryRmR(op, rm, rd, _64)
|
||||||
|
m.insert(instr)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, dst regalloc.VReg) {
|
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, dst regalloc.VReg) {
|
||||||
mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
|
mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||||
load := m.allocateInstr()
|
load := m.allocateInstr()
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||||
|
"github.com/tetratelabs/wazero/internal/platform"
|
||||||
"github.com/tetratelabs/wazero/internal/testing/require"
|
"github.com/tetratelabs/wazero/internal/testing/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -270,3 +271,161 @@ L1:
|
|||||||
ud2
|
ud2
|
||||||
`, m.Format())
|
`, m.Format())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_machine_lowerClz(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
setup func(*mockCompiler, ssa.Builder, *machine) *backend.SSAValueDefinition
|
||||||
|
cpuFlags platform.CpuFeatureFlags
|
||||||
|
tpe ssa.Type
|
||||||
|
exp string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no extra flags (64)",
|
||||||
|
cpuFlags: &mockCpuFlags{},
|
||||||
|
tpe: ssa.TypeI64,
|
||||||
|
exp: `
|
||||||
|
testq %rax, %rax
|
||||||
|
jnz L1
|
||||||
|
movabsq $64, %rcx
|
||||||
|
jmp L2
|
||||||
|
L1:
|
||||||
|
bsrq %rax, %rcx
|
||||||
|
xor $63, %rcx
|
||||||
|
L2:
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ABM (64)",
|
||||||
|
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||||
|
tpe: ssa.TypeI64,
|
||||||
|
exp: `
|
||||||
|
lzcntq %rax, %rcx
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no extra flags (32)",
|
||||||
|
cpuFlags: &mockCpuFlags{},
|
||||||
|
tpe: ssa.TypeI32,
|
||||||
|
exp: `
|
||||||
|
testl %eax, %eax
|
||||||
|
jnz L1
|
||||||
|
movl $32, %ecx
|
||||||
|
jmp L2
|
||||||
|
L1:
|
||||||
|
bsrl %eax, %ecx
|
||||||
|
xor $31, %ecx
|
||||||
|
L2:
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ABM (32)",
|
||||||
|
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||||
|
tpe: ssa.TypeI32,
|
||||||
|
exp: `
|
||||||
|
lzcntl %eax, %ecx
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
ctx, b, m := newSetupWithMockContext()
|
||||||
|
p := b.CurrentBlock().AddParam(b, tc.tpe)
|
||||||
|
m.cpuFeatures = tc.cpuFlags
|
||||||
|
|
||||||
|
ctx.definitions[p] = &backend.SSAValueDefinition{BlockParamValue: p, BlkParamVReg: raxVReg}
|
||||||
|
ctx.vRegMap[0] = rcxVReg
|
||||||
|
instr := &ssa.Instruction{}
|
||||||
|
instr.AsClz(p)
|
||||||
|
m.lowerClz(instr)
|
||||||
|
m.ectx.FlushPendingInstructions()
|
||||||
|
m.ectx.RootInstr = m.ectx.PerBlockHead
|
||||||
|
require.Equal(t, tc.exp, m.Format())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_machine_lowerCtz(t *testing.T) {
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
setup func(*mockCompiler, ssa.Builder, *machine) *backend.SSAValueDefinition
|
||||||
|
cpuFlags platform.CpuFeatureFlags
|
||||||
|
tpe ssa.Type
|
||||||
|
exp string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no extra flags (64)",
|
||||||
|
cpuFlags: &mockCpuFlags{},
|
||||||
|
tpe: ssa.TypeI64,
|
||||||
|
exp: `
|
||||||
|
testq %rax, %rax
|
||||||
|
jnz L1
|
||||||
|
movabsq $64, %rcx
|
||||||
|
jmp L2
|
||||||
|
L1:
|
||||||
|
bsfq %rax, %rcx
|
||||||
|
L2:
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ABM (64)",
|
||||||
|
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||||
|
tpe: ssa.TypeI64,
|
||||||
|
exp: `
|
||||||
|
tzcntq %rax, %rcx
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no extra flags (32)",
|
||||||
|
cpuFlags: &mockCpuFlags{},
|
||||||
|
tpe: ssa.TypeI32,
|
||||||
|
exp: `
|
||||||
|
testl %eax, %eax
|
||||||
|
jnz L1
|
||||||
|
movl $32, %ecx
|
||||||
|
jmp L2
|
||||||
|
L1:
|
||||||
|
bsfl %eax, %ecx
|
||||||
|
L2:
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ABM (32)",
|
||||||
|
cpuFlags: &mockCpuFlags{extraFlags: platform.CpuExtraFeatureAmd64ABM},
|
||||||
|
tpe: ssa.TypeI32,
|
||||||
|
exp: `
|
||||||
|
tzcntl %eax, %ecx
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
ctx, b, m := newSetupWithMockContext()
|
||||||
|
p := b.CurrentBlock().AddParam(b, tc.tpe)
|
||||||
|
m.cpuFeatures = tc.cpuFlags
|
||||||
|
|
||||||
|
ctx.definitions[p] = &backend.SSAValueDefinition{BlockParamValue: p, BlkParamVReg: raxVReg}
|
||||||
|
ctx.vRegMap[0] = rcxVReg
|
||||||
|
instr := &ssa.Instruction{}
|
||||||
|
instr.AsCtz(p)
|
||||||
|
m.lowerCtz(instr)
|
||||||
|
m.ectx.FlushPendingInstructions()
|
||||||
|
m.ectx.RootInstr = m.ectx.PerBlockHead
|
||||||
|
require.Equal(t, tc.exp, m.Format())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mockCpuFlags implements platform.CpuFeatureFlags
|
||||||
|
type mockCpuFlags struct {
|
||||||
|
flags platform.CpuFeature
|
||||||
|
extraFlags platform.CpuFeature
|
||||||
|
}
|
||||||
|
|
||||||
|
// Has implements the method of the same name in platform.CpuFeatureFlags
|
||||||
|
func (f *mockCpuFlags) Has(flag platform.CpuFeature) bool {
|
||||||
|
return (f.flags & flag) != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasExtra implements the method of the same name in platform.CpuFeatureFlags
|
||||||
|
func (f *mockCpuFlags) HasExtra(flag platform.CpuFeature) bool {
|
||||||
|
return (f.extraFlags & flag) != 0
|
||||||
|
}
|
||||||
|
|||||||
@@ -122,6 +122,15 @@ func TestE2E(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "integer bit counts", m: testcases.IntegerBitCounts.Module,
|
||||||
|
calls: []callCase{{
|
||||||
|
params: []uint64{10, 100},
|
||||||
|
expResults: []uint64{
|
||||||
|
28, 1, 2, 57, 2, 3,
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "many_params_many_results",
|
name: "many_params_many_results",
|
||||||
m: testcases.ManyParamsManyResults.Module,
|
m: testcases.ManyParamsManyResults.Module,
|
||||||
|
|||||||
25
internal/platform/cpuid.go
Normal file
25
internal/platform/cpuid.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
// CpuFeatureFlags exposes methods for querying CPU capabilities
|
||||||
|
type CpuFeatureFlags interface {
|
||||||
|
// Has returns true when the specified flag (represented as uint64) is supported
|
||||||
|
Has(cpuFeature CpuFeature) bool
|
||||||
|
// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
|
||||||
|
HasExtra(cpuFeature CpuFeature) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type CpuFeature uint64
|
||||||
|
|
||||||
|
const (
|
||||||
|
// CpuFeatureAmd64SSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities on amd64
|
||||||
|
CpuFeatureAmd64SSE3 CpuFeature = 1
|
||||||
|
// CpuFeatureAmd64SSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities on amd64
|
||||||
|
CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19
|
||||||
|
// CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64
|
||||||
|
CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64
|
||||||
|
CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5
|
||||||
|
)
|
||||||
@@ -1,30 +1,8 @@
|
|||||||
package platform
|
package platform
|
||||||
|
|
||||||
const (
|
|
||||||
// CpuFeatureSSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities
|
|
||||||
CpuFeatureSSE3 = uint64(1)
|
|
||||||
// CpuFeatureSSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities
|
|
||||||
CpuFeatureSSE4_1 = uint64(1) << 19
|
|
||||||
// CpuFeatureSSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities
|
|
||||||
CpuFeatureSSE4_2 = uint64(1) << 20
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// CpuExtraFeatureABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT)
|
|
||||||
CpuExtraFeatureABM = uint64(1) << 5
|
|
||||||
)
|
|
||||||
|
|
||||||
// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods
|
// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods
|
||||||
var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags()
|
var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags()
|
||||||
|
|
||||||
// CpuFeatureFlags exposes methods for querying CPU capabilities
|
|
||||||
type CpuFeatureFlags interface {
|
|
||||||
// Has returns true when the specified flag (represented as uint64) is supported
|
|
||||||
Has(cpuFeature uint64) bool
|
|
||||||
// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
|
|
||||||
HasExtra(cpuFeature uint64) bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// cpuFeatureFlags implements CpuFeatureFlags interface
|
// cpuFeatureFlags implements CpuFeatureFlags interface
|
||||||
type cpuFeatureFlags struct {
|
type cpuFeatureFlags struct {
|
||||||
flags uint64
|
flags uint64
|
||||||
@@ -69,11 +47,11 @@ func loadCpuFeatureFlags() CpuFeatureFlags {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Has implements the same method on the CpuFeatureFlags interface
|
// Has implements the same method on the CpuFeatureFlags interface
|
||||||
func (f *cpuFeatureFlags) Has(cpuFeature uint64) bool {
|
func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool {
|
||||||
return (f.flags & cpuFeature) != 0
|
return (f.flags & uint64(cpuFeature)) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// HasExtra implements the same method on the CpuFeatureFlags interface
|
// HasExtra implements the same method on the CpuFeatureFlags interface
|
||||||
func (f *cpuFeatureFlags) HasExtra(cpuFeature uint64) bool {
|
func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool {
|
||||||
return (f.extraFlags & cpuFeature) != 0
|
return (f.extraFlags & uint64(cpuFeature)) != 0
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,11 +8,11 @@ import (
|
|||||||
|
|
||||||
func TestAmd64CpuId_cpuHasFeature(t *testing.T) {
|
func TestAmd64CpuId_cpuHasFeature(t *testing.T) {
|
||||||
flags := cpuFeatureFlags{
|
flags := cpuFeatureFlags{
|
||||||
flags: CpuFeatureSSE3,
|
flags: uint64(CpuFeatureAmd64SSE3),
|
||||||
extraFlags: CpuExtraFeatureABM,
|
extraFlags: uint64(CpuExtraFeatureAmd64ABM),
|
||||||
}
|
}
|
||||||
require.True(t, flags.Has(CpuFeatureSSE3))
|
require.True(t, flags.Has(CpuFeatureAmd64SSE3))
|
||||||
require.False(t, flags.Has(CpuFeatureSSE4_2))
|
require.False(t, flags.Has(CpuFeatureAmd64SSE4_2))
|
||||||
require.True(t, flags.HasExtra(CpuExtraFeatureABM))
|
require.True(t, flags.HasExtra(CpuExtraFeatureAmd64ABM))
|
||||||
require.False(t, flags.HasExtra(1<<6)) // some other value
|
require.False(t, flags.HasExtra(1<<6)) // some other value
|
||||||
}
|
}
|
||||||
|
|||||||
14
internal/platform/cpuid_unsupported.go
Normal file
14
internal/platform/cpuid_unsupported.go
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
//go:build !amd64
|
||||||
|
|
||||||
|
package platform
|
||||||
|
|
||||||
|
var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{}
|
||||||
|
|
||||||
|
// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms
|
||||||
|
type cpuFeatureFlags struct{}
|
||||||
|
|
||||||
|
// Has implements the same method on the CpuFeatureFlags interface
|
||||||
|
func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false }
|
||||||
|
|
||||||
|
// HasExtra implements the same method on the CpuFeatureFlags interface
|
||||||
|
func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false }
|
||||||
@@ -3,5 +3,5 @@ package platform
|
|||||||
// init verifies that the current CPU supports the required AMD64 instructions
|
// init verifies that the current CPU supports the required AMD64 instructions
|
||||||
func init() {
|
func init() {
|
||||||
// Ensure SSE4.1 is supported.
|
// Ensure SSE4.1 is supported.
|
||||||
archRequirementsVerified = CpuFeatures.Has(CpuFeatureSSE4_1)
|
archRequirementsVerified = CpuFeatures.Has(CpuFeatureAmd64SSE4_1)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user