wazevo: passes simd load/store spec tests (#1766)
Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
This commit is contained in:
@@ -88,6 +88,7 @@ var defKinds = [numInstructionKinds]defKind{
|
||||
fpuLoad32: defKindRD,
|
||||
fpuLoad64: defKindRD,
|
||||
fpuLoad128: defKindRD,
|
||||
vecLoad1R: defKindRD,
|
||||
loadFpuConst32: defKindRD,
|
||||
loadFpuConst64: defKindRD,
|
||||
loadFpuConst128: defKindRD,
|
||||
@@ -212,6 +213,7 @@ var useKinds = [numInstructionKinds]useKind{
|
||||
loadFpuConst32: useKindNone,
|
||||
loadFpuConst64: useKindNone,
|
||||
loadFpuConst128: useKindNone,
|
||||
vecLoad1R: useKindRN,
|
||||
cSel: useKindRNRM,
|
||||
fpuCSel: useKindRNRM,
|
||||
movToVec: useKindRN,
|
||||
@@ -543,6 +545,13 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte)
|
||||
i.amode = amode
|
||||
}
|
||||
|
||||
func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
|
||||
i.kind = vecLoad1R
|
||||
i.rd = rd
|
||||
i.rn = rn
|
||||
i.u1 = uint64(arr)
|
||||
}
|
||||
|
||||
func (i *instruction) asCSet(rd regalloc.VReg, c condFlag) {
|
||||
i.kind = cSet
|
||||
i.rd = operandNR(rd)
|
||||
@@ -1474,6 +1483,8 @@ const (
|
||||
loadFpuConst64
|
||||
// loadFpuConst128 represents a load of a 128-bit floating-point constant.
|
||||
loadFpuConst128
|
||||
// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
|
||||
vecLoad1R
|
||||
// fpuToInt represents a conversion from FP to integer.
|
||||
fpuToInt
|
||||
// intToFpu represents a conversion from integer to FP.
|
||||
|
||||
@@ -46,6 +46,11 @@ func (i *instruction) encode(c backend.Compiler) {
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
|
||||
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
|
||||
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
|
||||
case vecLoad1R:
|
||||
c.Emit4Bytes(encodeVecLoad1Rrt(
|
||||
regNumberInEncoding[i.rd.realReg()],
|
||||
regNumberInEncoding[i.rn.realReg()],
|
||||
vecArrangement(i.u1)))
|
||||
case condBr:
|
||||
imm19 := i.condBrOffset()
|
||||
if imm19%4 != 0 {
|
||||
@@ -1293,6 +1298,13 @@ func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint3
|
||||
}
|
||||
}
|
||||
|
||||
// encodeVecLoad1Rrt encodes as Load one single-element structure and Replicate to all lanes (of one register) in
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm
|
||||
func encodeVecLoad1Rrt(rt, rn uint32, arr vecArrangement) uint32 {
|
||||
size, q := arrToSizeQEncoded(arr)
|
||||
return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt
|
||||
}
|
||||
|
||||
// encodeAluBitmaskImmediate encodes as Logical (immediate) in
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
|
||||
func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 {
|
||||
|
||||
@@ -1589,6 +1589,7 @@ func TestInstruction_encode(t *testing.T) {
|
||||
i.asFpuRR(fpuUniOpRoundNearest, operandNR(v1VReg), operandNR(v2VReg), true)
|
||||
}},
|
||||
{want: "4140611e", setup: func(i *instruction) { i.asFpuRR(fpuUniOpNeg, operandNR(v1VReg), operandNR(v2VReg), true) }},
|
||||
{want: "41c4404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) }},
|
||||
{want: "4201231e4201631e4201239e4201639e4201221e4201621e4201229e4201629e", setup: func(i *instruction) {
|
||||
i.asNop0()
|
||||
cur := i
|
||||
|
||||
@@ -674,6 +674,24 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
|
||||
}
|
||||
m.insert(dup)
|
||||
|
||||
case ssa.OpcodeLoadSplat:
|
||||
x, offset, lane := instr.LoadSplatData()
|
||||
rd := operandNR(m.compiler.VRegOf(instr.Return()))
|
||||
arr := ssaLaneToArrangement(lane)
|
||||
|
||||
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
|
||||
tmpReg := m.compiler.AllocateVReg(ssa.TypeI32)
|
||||
|
||||
// Our encoding for vecLoad1R does not support all the addressing modes yet,
|
||||
// we use the no-offset addressing mode and add the offset to a temp register.
|
||||
add := m.allocateInstr()
|
||||
add.asALU(aluOpAdd, operandNR(tmpReg), rn, operandImm12(uint16(offset), 0), true)
|
||||
m.insert(add)
|
||||
|
||||
ld1r := m.allocateInstr()
|
||||
ld1r.asVecLoad1R(rd, operandNR(tmpReg), arr)
|
||||
m.insert(ld1r)
|
||||
|
||||
default:
|
||||
panic("TODO: lowering " + op.String())
|
||||
}
|
||||
|
||||
@@ -199,9 +199,22 @@ func TestSpectestV2(t *testing.T) {
|
||||
{"simd_i16x8_extadd_pairwise_i8x16"},
|
||||
{"simd_i32x4_extadd_pairwise_i16x8"},
|
||||
{"simd_int_to_int_extend"},
|
||||
{"simd_load"},
|
||||
{"simd_load_extend"},
|
||||
{"simd_load_splat"},
|
||||
{"simd_load_zero"},
|
||||
{"simd_load8_lane"},
|
||||
{"simd_load16_lane"},
|
||||
{"simd_load32_lane"},
|
||||
{"simd_load64_lane"},
|
||||
{"simd_lane"},
|
||||
{"simd_linking"},
|
||||
{"simd_splat"},
|
||||
{"simd_store"},
|
||||
{"simd_store8_lane"},
|
||||
{"simd_store16_lane"},
|
||||
{"simd_store32_lane"},
|
||||
{"simd_store64_lane"},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Run("normal", func(t *testing.T) {
|
||||
|
||||
@@ -1705,6 +1705,135 @@ func (c *Compiler) lowerCurrentOpcode() {
|
||||
load.AsLoad(addr, offset, ssa.TypeV128)
|
||||
builder.InsertInstruction(load)
|
||||
state.push(load.Return())
|
||||
case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane, wasm.OpcodeVecV128Load32Lane:
|
||||
_, offset := c.readMemArg()
|
||||
state.pc++
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
var lane ssa.VecLane
|
||||
var loadOp ssa.Opcode
|
||||
var opSize uint64
|
||||
switch vecOp {
|
||||
case wasm.OpcodeVecV128Load8Lane:
|
||||
loadOp, lane, opSize = ssa.OpcodeUload8, ssa.VecLaneI8x16, 1
|
||||
case wasm.OpcodeVecV128Load16Lane:
|
||||
loadOp, lane, opSize = ssa.OpcodeUload16, ssa.VecLaneI16x8, 2
|
||||
case wasm.OpcodeVecV128Load32Lane:
|
||||
loadOp, lane, opSize = ssa.OpcodeUload32, ssa.VecLaneI32x4, 4
|
||||
}
|
||||
laneIndex := c.wasmFunctionBody[state.pc]
|
||||
vector := state.pop()
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
|
||||
load := builder.AllocateInstruction().
|
||||
AsExtLoad(loadOp, addr, offset, false).
|
||||
Insert(builder).Return()
|
||||
ret := builder.AllocateInstruction().
|
||||
AsInsertlane(vector, load, laneIndex, lane).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
case wasm.OpcodeVecV128Load64Lane:
|
||||
_, offset := c.readMemArg()
|
||||
state.pc++
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
laneIndex := c.wasmFunctionBody[state.pc]
|
||||
vector := state.pop()
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
|
||||
load := builder.AllocateInstruction().
|
||||
AsLoad(addr, offset, ssa.TypeI64).
|
||||
Insert(builder).Return()
|
||||
ret := builder.AllocateInstruction().
|
||||
AsInsertlane(vector, load, laneIndex, ssa.VecLaneI64x2).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
|
||||
case wasm.OpcodeVecV128Load32zero:
|
||||
_, offset := c.readMemArg()
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), 4)
|
||||
ret := builder.AllocateInstruction().
|
||||
AsLoad(addr, offset, ssa.TypeF32).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
|
||||
case wasm.OpcodeVecV128Load64zero:
|
||||
_, offset := c.readMemArg()
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
|
||||
ret := builder.AllocateInstruction().
|
||||
AsLoad(addr, offset, ssa.TypeF64).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
|
||||
case wasm.OpcodeVecV128Load8x8u, wasm.OpcodeVecV128Load8x8s,
|
||||
wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load16x4s,
|
||||
wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load32x2s:
|
||||
_, offset := c.readMemArg()
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
var lane ssa.VecLane
|
||||
var signed bool
|
||||
switch vecOp {
|
||||
case wasm.OpcodeVecV128Load8x8s:
|
||||
signed = true
|
||||
fallthrough
|
||||
case wasm.OpcodeVecV128Load8x8u:
|
||||
lane = ssa.VecLaneI8x16
|
||||
case wasm.OpcodeVecV128Load16x4s:
|
||||
signed = true
|
||||
fallthrough
|
||||
case wasm.OpcodeVecV128Load16x4u:
|
||||
lane = ssa.VecLaneI16x8
|
||||
case wasm.OpcodeVecV128Load32x2s:
|
||||
signed = true
|
||||
fallthrough
|
||||
case wasm.OpcodeVecV128Load32x2u:
|
||||
lane = ssa.VecLaneI32x4
|
||||
}
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
|
||||
load := builder.AllocateInstruction().
|
||||
AsLoad(addr, offset, ssa.TypeV128).
|
||||
Insert(builder).Return()
|
||||
ret := builder.AllocateInstruction().
|
||||
AsWiden(load, lane, signed, true).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
case wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
|
||||
wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat:
|
||||
_, offset := c.readMemArg()
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
var lane ssa.VecLane
|
||||
var opSize uint64
|
||||
switch vecOp {
|
||||
case wasm.OpcodeVecV128Load8Splat:
|
||||
lane, opSize = ssa.VecLaneI8x16, 1
|
||||
case wasm.OpcodeVecV128Load16Splat:
|
||||
lane, opSize = ssa.VecLaneI16x8, 2
|
||||
case wasm.OpcodeVecV128Load32Splat:
|
||||
lane, opSize = ssa.VecLaneI32x4, 4
|
||||
case wasm.OpcodeVecV128Load64Splat:
|
||||
lane, opSize = ssa.VecLaneI64x2, 8
|
||||
}
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
|
||||
ret := builder.AllocateInstruction().
|
||||
AsLoadSplat(addr, offset, lane).
|
||||
Insert(builder).Return()
|
||||
state.push(ret)
|
||||
case wasm.OpcodeVecV128Store:
|
||||
_, offset := c.readMemArg()
|
||||
if state.unreachable {
|
||||
@@ -1716,7 +1845,36 @@ func (c *Compiler) lowerCurrentOpcode() {
|
||||
builder.AllocateInstruction().
|
||||
AsStore(ssa.OpcodeStore, value, addr, offset).
|
||||
Insert(builder)
|
||||
|
||||
case wasm.OpcodeVecV128Store8Lane, wasm.OpcodeVecV128Store16Lane,
|
||||
wasm.OpcodeVecV128Store32Lane, wasm.OpcodeVecV128Store64Lane:
|
||||
_, offset := c.readMemArg()
|
||||
state.pc++
|
||||
if state.unreachable {
|
||||
break
|
||||
}
|
||||
laneIndex := c.wasmFunctionBody[state.pc]
|
||||
var storeOp ssa.Opcode
|
||||
var lane ssa.VecLane
|
||||
var opSize uint64
|
||||
switch vecOp {
|
||||
case wasm.OpcodeVecV128Store8Lane:
|
||||
storeOp, lane, opSize = ssa.OpcodeIstore8, ssa.VecLaneI8x16, 1
|
||||
case wasm.OpcodeVecV128Store16Lane:
|
||||
storeOp, lane, opSize = ssa.OpcodeIstore16, ssa.VecLaneI16x8, 2
|
||||
case wasm.OpcodeVecV128Store32Lane:
|
||||
storeOp, lane, opSize = ssa.OpcodeIstore32, ssa.VecLaneI32x4, 4
|
||||
case wasm.OpcodeVecV128Store64Lane:
|
||||
storeOp, lane, opSize = ssa.OpcodeStore, ssa.VecLaneI64x2, 8
|
||||
}
|
||||
vector := state.pop()
|
||||
baseAddr := state.pop()
|
||||
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
|
||||
value := builder.AllocateInstruction().
|
||||
AsExtractlane(vector, laneIndex, lane, false).
|
||||
Insert(builder).Return()
|
||||
builder.AllocateInstruction().
|
||||
AsStore(storeOp, value, addr, offset).
|
||||
Insert(builder)
|
||||
case wasm.OpcodeVecV128Not:
|
||||
if state.unreachable {
|
||||
break
|
||||
|
||||
@@ -308,6 +308,9 @@ const (
|
||||
// `v = sload32x2 MemFlags, p, Offset`.
|
||||
OpcodeSload32x2
|
||||
|
||||
// OpcodeLoadSplat represents a load that replicates the loaded value to all lanes `v = LoadSplat.lane MemFlags, p, Offset`.
|
||||
OpcodeLoadSplat
|
||||
|
||||
// OpcodeIconst represents the integer const.
|
||||
OpcodeIconst
|
||||
|
||||
@@ -712,10 +715,6 @@ const (
|
||||
// OpcodeBitcast is a bitcast operation: `v = bitcast MemFlags, x`.
|
||||
OpcodeBitcast
|
||||
|
||||
// OpcodeScalarToVector ...
|
||||
// `v = scalar_to_vector s`.
|
||||
OpcodeScalarToVector
|
||||
|
||||
// OpcodeBmask ...
|
||||
// `v = bmask x`.
|
||||
OpcodeBmask
|
||||
@@ -881,6 +880,7 @@ var instructionSideEffects = [opcodeEnd]sideEffect{
|
||||
OpcodeCtz: sideEffectNone,
|
||||
OpcodePopcnt: sideEffectNone,
|
||||
OpcodeLoad: sideEffectNone,
|
||||
OpcodeLoadSplat: sideEffectNone,
|
||||
OpcodeUload8: sideEffectNone,
|
||||
OpcodeUload16: sideEffectNone,
|
||||
OpcodeUload32: sideEffectNone,
|
||||
@@ -1106,6 +1106,7 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{
|
||||
return
|
||||
},
|
||||
OpcodeLoad: returnTypesFnSingle,
|
||||
OpcodeLoadSplat: returnTypesFnV128,
|
||||
OpcodeIadd: returnTypesFnSingle,
|
||||
OpcodeIsub: returnTypesFnSingle,
|
||||
OpcodeImul: returnTypesFnSingle,
|
||||
@@ -1182,7 +1183,7 @@ func (i *Instruction) AsLoad(ptr Value, offset uint32, typ Type) *Instruction {
|
||||
}
|
||||
|
||||
// AsExtLoad initializes this instruction as a store instruction with OpcodeLoad.
|
||||
func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) {
|
||||
func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) *Instruction {
|
||||
i.opcode = op
|
||||
i.v = ptr
|
||||
i.u1 = uint64(offset)
|
||||
@@ -1191,14 +1192,17 @@ func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bo
|
||||
} else {
|
||||
i.typ = TypeI32
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// AsSimdLoad initializes this instruction as a load instruction with OpcodeLoad 128 bit.
|
||||
func (i *Instruction) AsSimdLoad(op Opcode, ptr Value, offset uint32) {
|
||||
i.opcode = op
|
||||
// AsLoadSplat initializes this instruction as a store instruction with OpcodeLoadSplat.
|
||||
func (i *Instruction) AsLoadSplat(ptr Value, offset uint32, lane VecLane) *Instruction {
|
||||
i.opcode = OpcodeLoadSplat
|
||||
i.v = ptr
|
||||
i.u1 = uint64(offset)
|
||||
i.u2 = uint64(lane)
|
||||
i.typ = TypeV128
|
||||
return i
|
||||
}
|
||||
|
||||
// LoadData returns the operands for a load instruction.
|
||||
@@ -1206,6 +1210,11 @@ func (i *Instruction) LoadData() (ptr Value, offset uint32, typ Type) {
|
||||
return i.v, uint32(i.u1), i.typ
|
||||
}
|
||||
|
||||
// LoadSplatData returns the operands for a load splat instruction.
|
||||
func (i *Instruction) LoadSplatData() (ptr Value, offset uint32, lane VecLane) {
|
||||
return i.v, uint32(i.u1), VecLane(i.u2)
|
||||
}
|
||||
|
||||
// AsStore initializes this instruction as a store instruction with OpcodeStore.
|
||||
func (i *Instruction) AsStore(storeOp Opcode, value, ptr Value, offset uint32) *Instruction {
|
||||
i.opcode = storeOp
|
||||
@@ -2512,6 +2521,8 @@ func (i *Instruction) Format(b Builder) string {
|
||||
instSuffix = fmt.Sprintf(" %s, %s, %#x", i.v.Format(b), i.v2.Format(b), int32(i.u1))
|
||||
case OpcodeLoad:
|
||||
instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
|
||||
case OpcodeLoadSplat:
|
||||
instSuffix = fmt.Sprintf(".%s %s, %#x", VecLane(i.u2), i.v.Format(b), int32(i.u1))
|
||||
case OpcodeUload8, OpcodeUload16, OpcodeUload32, OpcodeSload8, OpcodeSload16, OpcodeSload32:
|
||||
instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
|
||||
case OpcodeSelect, OpcodeVbitselect:
|
||||
@@ -2720,6 +2731,8 @@ func (o Opcode) String() (ret string) {
|
||||
return "SsubSat"
|
||||
case OpcodeLoad:
|
||||
return "Load"
|
||||
case OpcodeLoadSplat:
|
||||
return "LoadSplat"
|
||||
case OpcodeStore:
|
||||
return "Store"
|
||||
case OpcodeUload8:
|
||||
@@ -2906,8 +2919,6 @@ func (o Opcode) String() (ret string) {
|
||||
return "Nearest"
|
||||
case OpcodeBitcast:
|
||||
return "Bitcast"
|
||||
case OpcodeScalarToVector:
|
||||
return "ScalarToVector"
|
||||
case OpcodeBmask:
|
||||
return "Bmask"
|
||||
case OpcodeIreduce:
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/tetratelabs/wazero"
|
||||
@@ -49,12 +48,6 @@ func runWithInterpreter(t *testing.T, runner func(t *testing.T, r wazero.Runtime
|
||||
|
||||
func runWithWazevo(t *testing.T, runner func(t *testing.T, r wazero.Runtime)) {
|
||||
t.Run("wazevo", func(t *testing.T) {
|
||||
name := t.Name()
|
||||
for _, skipTarget := range []string{"695", "701", "718"} {
|
||||
if strings.Contains(name, skipTarget) {
|
||||
t.Skip("TODO: skipping for wazevo until SIMD is completed")
|
||||
}
|
||||
}
|
||||
config := wazero.NewRuntimeConfigInterpreter()
|
||||
wazevo.ConfigureWazevo(config)
|
||||
r := wazero.NewRuntimeWithConfig(ctx, config)
|
||||
|
||||
Reference in New Issue
Block a user