diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index c4624da7..025522b8 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -1776,7 +1776,13 @@ func (b vecOp) String() string { case vecOpUmlal: return "umlal" case vecOpFdiv: - return "fmul" + return "fdiv" + case vecOpFsqrt: + return "fsqrt" + case vecOpAbs: + return "abs" + case vecOpFabs: + return "fabs" case vecOpNeg: return "neg" case vecOpFneg: @@ -1807,8 +1813,6 @@ func (b vecOp) String() string { return "uqxtn" case vecOpSqxtun: return "sqxtun" - case vecOpFsqrt: - return "fsqrt" case vecOpRev64: return "rev64" case vecOpXtn: @@ -1827,8 +1831,6 @@ func (b vecOp) String() string { return "sshr" case vecOpZip1: return "zip1" - case vecOpFabs: - return "fabs" case vecOpFmin: return "fmin" case vecOpFmax: diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index e47b0501..f3490b6a 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -766,16 +766,16 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem // Copy the shift amount into a vector register as sshl/ushl requires it to be there. dup := m.allocateInstr() - dup.asVecDup(rd, tmp, arr) + dup.asVecDup(tmp, tmp, arr) m.insert(dup) if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { sshl := m.allocateInstr() - sshl.asVecRRR(vecOpSshl, rd, rn, rd, arr) + sshl.asVecRRR(vecOpSshl, rd, rn, tmp, arr) m.insert(sshl) } else { ushl := m.allocateInstr() - ushl.asVecRRR(vecOpUshl, rd, rn, rd, arr) + ushl.asVecRRR(vecOpUshl, rd, rn, tmp, arr) m.insert(ushl) } } @@ -787,19 +787,19 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem if op == ssa.OpcodeVallTrue && arr == vecArrangement2D { // cmeq v3?.2d, v2?.2d, #0 // addp v3?.2d, v3?.2d, v3?.2d - // fcmp x3?, x3? - // cset x3?, eq + // fcmp v3?, v3? + // cset dst, eq ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, rd, rm, vecArrangement2D) + ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) m.insert(ins) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, rd, rd, rd, vecArrangement2D) + addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) m.insert(addp) fcmp := m.allocateInstr() - fcmp.asFpuCmp(rd, rd, true) + fcmp.asFpuCmp(tmp, tmp, true) m.insert(fcmp) cset := m.allocateInstr() @@ -1437,7 +1437,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1472,10 +1472,6 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmhi, rd, rm, rn, arr) // rm, rn are swapped m.insert(cmp) } - - cset := m.allocateInstr() - cset.asCSet(rd.reg(), flag) - m.insert(cset) } func (m *machine) lowerVFcmp(si *ssa.Instruction) { diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go index 90096307..87da9174 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go @@ -559,12 +559,12 @@ cset x15, ne op: ssa.OpcodeVallTrue, arrangement: vecArrangement2D, expectedAsm: ` -cmeq x15.2d, x1.2d, #0 -addp x15.2d, x15.2d, x15.2d -fcmp x15, x15 +cmeq v1?.2d, x1.2d, #0 +addp v1?.2d, v1?.2d, v1?.2d +fcmp d1?, d1? cset x15, eq `, - expectedBytes: "2f98e04eefbdef4ee0216f1eef179f9a", + expectedBytes: "2098e04e00bce04e0020601eef179f9a", }, { name: "allTrue 8B", @@ -788,10 +788,10 @@ func TestMachine_lowerVShift(t *testing.T) { arrangement: vecArrangement16B, expectedAsm: ` and s1?, w15, #0x7 -dup x1.16b, d1? -sshl x1.16b, x2.16b, x1.16b +dup v1?.16b, d1? +sshl x1.16b, x2.16b, v1?.16b `, - expectedBytes: "e0090012010c014e4144214e", + expectedBytes: "e0090012000c014e4144204e", }, { name: "VSshr", @@ -800,10 +800,10 @@ sshl x1.16b, x2.16b, x1.16b expectedAsm: ` and s1?, w15, #0x7 sub s1?, wzr, s1? -dup x1.16b, d1? -sshl x1.16b, x2.16b, x1.16b +dup v1?.16b, d1? +sshl x1.16b, x2.16b, v1?.16b `, - expectedBytes: "e0090012e003004b010c014e4144214e", + expectedBytes: "e0090012e003004b000c014e4144204e", }, { name: "VUshr", @@ -812,10 +812,10 @@ sshl x1.16b, x2.16b, x1.16b expectedAsm: ` and s1?, w15, #0x7 sub s1?, wzr, s1? -dup x1.16b, d1? -ushl x1.16b, x2.16b, x1.16b +dup v1?.16b, d1? +ushl x1.16b, x2.16b, v1?.16b `, - expectedBytes: "e0090012e003004b010c014e4144216e", + expectedBytes: "e0090012e003004b000c014e4144206e", }, } { t.Run(tc.name, func(t *testing.T) { diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 92727c80..8324f68e 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -835,7 +835,7 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{ OpcodeVIsub: returnTypesFnV128, OpcodeVSsubSat: returnTypesFnV128, OpcodeVUsubSat: returnTypesFnV128, - OpcodeVIcmp: returnTypesFnI32, + OpcodeVIcmp: returnTypesFnV128, OpcodeVImin: returnTypesFnV128, OpcodeVUmin: returnTypesFnV128, OpcodeVImax: returnTypesFnV128, @@ -1406,7 +1406,7 @@ func (i *Instruction) AsVIcmp(x, y Value, c IntegerCmpCond, lane VecLane) *Instr i.v2 = y i.u1 = uint64(c) i.u2 = uint64(lane) - i.typ = TypeI32 + i.typ = TypeV128 return i } @@ -1416,7 +1416,7 @@ func (i *Instruction) AsVFcmp(x, y Value, c FloatCmpCond, lane VecLane) *Instruc i.v = x i.v2 = y i.u1 = uint64(c) - i.typ = TypeI32 + i.typ = TypeV128 i.u2 = uint64(lane) return i } diff --git a/internal/integration_test/fuzzcases/fuzzcases_test.go b/internal/integration_test/fuzzcases/fuzzcases_test.go index ab8283d9..e9714385 100644 --- a/internal/integration_test/fuzzcases/fuzzcases_test.go +++ b/internal/integration_test/fuzzcases/fuzzcases_test.go @@ -482,3 +482,66 @@ func Test1792c(t *testing.T) { require.Equal(t, uint64(9205357640488583168), m.Globals[0].ValHi) }) } + +// Test1793a tests that OpcodeVAllTrue is lowered to the right registers. +func Test1793a(t *testing.T) { + if !platform.CompilerSupported() { + return + } + run(t, func(t *testing.T, r wazero.Runtime) { + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1793a")) + require.NoError(t, err) + m := mod.(*wasm.ModuleInstance) + _, err = m.ExportedFunction("").Call(ctx) + require.NoError(t, err) + require.Equal(t, uint64(2531906066518671488), m.Globals[2].Val) + require.Equal(t, uint64(18446744073709551615), m.Globals[2].ValHi) + }) +} + +// Test1793b tests that OpcodeVIcmp, OpcodeVFcmp are lowered to the right registers. +func Test1793b(t *testing.T) { + if !platform.CompilerSupported() { + return + } + run(t, func(t *testing.T, r wazero.Runtime) { + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1793b")) + require.NoError(t, err) + m := mod.(*wasm.ModuleInstance) + _, err = m.ExportedFunction("").Call(ctx, 0, 0, 0, 0) + require.NoError(t, err) + require.Equal(t, uint64(18374967954648334335), m.Globals[1].Val) + require.Equal(t, uint64(18446744073709551615), m.Globals[1].ValHi) + }) +} + +// Test1793c tests that OpcodeVIcmp is lowered to the right registers. +func Test1793c(t *testing.T) { + if !platform.CompilerSupported() { + return + } + run(t, func(t *testing.T, r wazero.Runtime) { + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1793c")) + require.NoError(t, err) + m := mod.(*wasm.ModuleInstance) + _, err = m.ExportedFunction("").Call(ctx, 0, 0) + require.NoError(t, err) + require.Equal(t, uint64(18446744073709551615), m.Globals[0].Val) + require.Equal(t, uint64(18446744073709551615), m.Globals[0].ValHi) + }) +} + +// Test1793c tests that OpcodeVShift is lowered to the right registers. +func Test1793d(t *testing.T) { + if !platform.CompilerSupported() { + return + } + run(t, func(t *testing.T, r wazero.Runtime) { + mod, err := r.Instantiate(ctx, getWasmBinary(t, "1793d")) + require.NoError(t, err) + m := mod.(*wasm.ModuleInstance) + _, err = m.ExportedFunction("").Call(ctx) + require.NoError(t, err) + require.Equal(t, uint64(0), m.Globals[1].Val) + }) +} diff --git a/internal/integration_test/fuzzcases/testdata/1793a.wasm b/internal/integration_test/fuzzcases/testdata/1793a.wasm new file mode 100644 index 00000000..3a7b40e3 Binary files /dev/null and b/internal/integration_test/fuzzcases/testdata/1793a.wasm differ diff --git a/internal/integration_test/fuzzcases/testdata/1793a.wat b/internal/integration_test/fuzzcases/testdata/1793a.wat new file mode 100644 index 00000000..4afaf80a --- /dev/null +++ b/internal/integration_test/fuzzcases/testdata/1793a.wat @@ -0,0 +1,43 @@ +(module + (type (;0;) (func)) + (type (;1;) (func (param f64 f64 f64) (result externref f64 i64))) + (type (;2;) (func (param f64 f64 f64 i64 f64 funcref))) + (func (;0;) (type 0) + (local f32) + v128.const i32x4 0x23808080 0x23232327 0xffffffff 0xffffffff + ref.null func + i32.const 1549556771 + i16x8.splat + i64x2.all_true + i64.extend_i32_u + f32.const 0x1.fe49fep-55 (;=0.000000000000000055325648;) + f32.nearest + local.tee 0 + f32.const nan (;=NaN;) + local.get 0 + local.get 0 + f32.eq + select + i32.reinterpret_f32 + global.get 0 + i32.xor + global.set 0 + global.get 1 + i64.xor + global.set 1 + drop + global.get 2 + v128.xor + global.set 2 + ) + (table (;0;) 26 510 funcref) + (global (;0;) (mut i32) i32.const 0) + (global (;1;) (mut i64) i64.const 0) + (global (;2;) (mut v128) v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (global (;3;) (mut i32) i32.const 1000) + (export "" (func 0)) + (export "\00\00\00\00" (table 0)) + (export "EEEE\02\00" (global 0)) + (export "3" (global 1)) + (export "4" (global 2)) +) diff --git a/internal/integration_test/fuzzcases/testdata/1793b.wasm b/internal/integration_test/fuzzcases/testdata/1793b.wasm new file mode 100644 index 00000000..46c5b2d0 Binary files /dev/null and b/internal/integration_test/fuzzcases/testdata/1793b.wasm differ diff --git a/internal/integration_test/fuzzcases/testdata/1793b.wat b/internal/integration_test/fuzzcases/testdata/1793b.wat new file mode 100644 index 00000000..77d9f50b --- /dev/null +++ b/internal/integration_test/fuzzcases/testdata/1793b.wat @@ -0,0 +1,65 @@ +(module + (type (;0;) (func (param f64 v128 i32) (result i64))) + (func (;0;) (type 0) (param f64 v128 i32) (result i64) + (local v128 v128 f64 f64) + local.get 1 + i64x2.abs + v128.const i32x4 0xffff6824 0xffff6262 0xffffffff 0x363636ff + f64x2.floor + local.tee 3 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 3 + local.get 3 + f64x2.eq + v128.bitselect + f64x2.floor + local.tee 4 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 4 + local.get 4 + f64x2.eq + v128.bitselect + i8x16.le_s + local.get 0 + f64.sqrt + local.tee 5 + f64.const nan (;=NaN;) + local.get 5 + local.get 5 + f64.eq + select + f64.sqrt + local.tee 6 + f64.const nan (;=NaN;) + local.get 6 + local.get 6 + f64.eq + select + local.tee 0 + i64.trunc_sat_f64_u + i64.const 3906369100484640767 + i64.le_u + f32.convert_i32_s + i64.trunc_f32_s + i64.extend16_s + f64.reinterpret_i64 + i32.trunc_f64_u + i32.const 50282532 + global.get 0 + i32.xor + global.set 0 + global.get 0 + i32.xor + global.set 0 + global.get 1 + v128.xor + global.set 1 + i64.const 36 + ) + (global (;0;) (mut i32) i32.const 0) + (global (;1;) (mut v128) v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (global (;2;) (mut i32) i32.const 1000) + (export "" (func 0)) + (export "1" (global 0)) + (export "2" (global 1)) +) diff --git a/internal/integration_test/fuzzcases/testdata/1793c.wasm b/internal/integration_test/fuzzcases/testdata/1793c.wasm new file mode 100644 index 00000000..ea8121ef Binary files /dev/null and b/internal/integration_test/fuzzcases/testdata/1793c.wasm differ diff --git a/internal/integration_test/fuzzcases/testdata/1793c.wat b/internal/integration_test/fuzzcases/testdata/1793c.wat new file mode 100644 index 00000000..1320c657 --- /dev/null +++ b/internal/integration_test/fuzzcases/testdata/1793c.wat @@ -0,0 +1,25 @@ +(module + (type (;0;) (func (param f64 f64))) + (func (;0;) (type 0) (param f64 f64) + (local externref i64 v128) + v128.const i32x4 0x67676767 0x67676767 0xa9676767 0x67676767 + f32x4.ceil + local.tee 4 + v128.const i32x4 0x7fc00000 0x7fc00000 0x7fc00000 0x7fc00000 + local.get 4 + local.get 4 + f32x4.eq + v128.bitselect + v128.const i32x4 0x40bf0242 0xff89ff40 0x64ffffff 0x96966464 + i8x16.ne + global.get 0 + v128.xor + global.set 0 + ) + (memory (;0;) 4 4) + (global (;0;) (mut v128) v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (global (;1;) (mut i32) i32.const 1000) + (export "" (func 0)) + (export "1" (memory 0)) + (export "zz" (global 0)) +) diff --git a/internal/integration_test/fuzzcases/testdata/1793d.wasm b/internal/integration_test/fuzzcases/testdata/1793d.wasm new file mode 100644 index 00000000..59012a07 Binary files /dev/null and b/internal/integration_test/fuzzcases/testdata/1793d.wasm differ diff --git a/internal/integration_test/fuzzcases/testdata/1793d.wat b/internal/integration_test/fuzzcases/testdata/1793d.wat new file mode 100644 index 00000000..b2a4ded7 --- /dev/null +++ b/internal/integration_test/fuzzcases/testdata/1793d.wat @@ -0,0 +1,207 @@ +(module + (type (;0;) (func (result i64))) + (func (;0;) (type 0) (result i64) + (local f64 f64 f64 f64 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128 v128) + global.get 2 + i32.eqz + if ;; label = @1 + unreachable + end + global.get 2 + i32.const 1 + i32.sub + global.set 2 + i64.const 39584465551547 + i64.popcnt + ref.null extern + local.get 1 + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.neg + f64.floor + local.tee 3 + f64.const nan (;=NaN;) + local.get 3 + local.get 3 + f64.eq + select + i32.trunc_f64_u + v128.const i32x4 0xffffffff 0xffffffff 0xffffffff 0xffffffff + f64x2.nearest + local.tee 4 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 4 + local.get 4 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 5 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 5 + local.get 5 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 6 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 6 + local.get 6 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 7 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 7 + local.get 7 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 8 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 8 + local.get 8 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 9 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 9 + local.get 9 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 10 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 10 + local.get 10 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 11 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 11 + local.get 11 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 12 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 12 + local.get 12 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 13 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 13 + local.get 13 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 14 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 14 + local.get 14 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 15 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 15 + local.get 15 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 16 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 16 + local.get 16 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 17 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 17 + local.get 17 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 18 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 18 + local.get 18 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 19 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 19 + local.get 19 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 20 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 20 + local.get 20 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 21 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 21 + local.get 21 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 22 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 22 + local.get 22 + f64x2.eq + v128.bitselect + f64x2.nearest + local.tee 23 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 23 + local.get 23 + f64x2.eq + v128.bitselect + i64.const 720575940379279359 + i32.wrap_i64 + i8x16.shr_s + f32x4.convert_i32x4_s + f64x2.floor + local.tee 24 + v128.const i32x4 0x00000000 0x7ff80000 0x00000000 0x7ff80000 + local.get 24 + local.get 24 + f64x2.eq + v128.bitselect + global.get 0 + v128.xor + global.set 0 + global.get 1 + i32.xor + global.set 1 + drop + ) + (global (;0;) (mut v128) v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (global (;1;) (mut i32) i32.const 0) + (global (;2;) (mut i32) i32.const 1000) + (export "" (func 0)) + (export "1" (global 0)) + (export "2" (global 1)) +)