wazevo: passes simd load/store spec tests (#1766)

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
This commit is contained in:
Edoardo Vacchi
2023-10-11 01:18:07 +02:00
committed by GitHub
parent fc8419346a
commit e3d83bbc7a
8 changed files with 235 additions and 18 deletions

View File

@@ -88,6 +88,7 @@ var defKinds = [numInstructionKinds]defKind{
fpuLoad32: defKindRD,
fpuLoad64: defKindRD,
fpuLoad128: defKindRD,
vecLoad1R: defKindRD,
loadFpuConst32: defKindRD,
loadFpuConst64: defKindRD,
loadFpuConst128: defKindRD,
@@ -212,6 +213,7 @@ var useKinds = [numInstructionKinds]useKind{
loadFpuConst32: useKindNone,
loadFpuConst64: useKindNone,
loadFpuConst128: useKindNone,
vecLoad1R: useKindRN,
cSel: useKindRNRM,
fpuCSel: useKindRNRM,
movToVec: useKindRN,
@@ -543,6 +545,13 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte)
i.amode = amode
}
func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
i.kind = vecLoad1R
i.rd = rd
i.rn = rn
i.u1 = uint64(arr)
}
func (i *instruction) asCSet(rd regalloc.VReg, c condFlag) {
i.kind = cSet
i.rd = operandNR(rd)
@@ -1474,6 +1483,8 @@ const (
loadFpuConst64
// loadFpuConst128 represents a load of a 128-bit floating-point constant.
loadFpuConst128
// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
vecLoad1R
// fpuToInt represents a conversion from FP to integer.
fpuToInt
// intToFpu represents a conversion from integer to FP.

View File

@@ -46,6 +46,11 @@ func (i *instruction) encode(c backend.Compiler) {
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
case vecLoad1R:
c.Emit4Bytes(encodeVecLoad1Rrt(
regNumberInEncoding[i.rd.realReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u1)))
case condBr:
imm19 := i.condBrOffset()
if imm19%4 != 0 {
@@ -1293,6 +1298,13 @@ func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint3
}
}
// encodeVecLoad1Rrt encodes as Load one single-element structure and Replicate to all lanes (of one register) in
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm
func encodeVecLoad1Rrt(rt, rn uint32, arr vecArrangement) uint32 {
size, q := arrToSizeQEncoded(arr)
return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt
}
// encodeAluBitmaskImmediate encodes as Logical (immediate) in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 {

View File

@@ -1589,6 +1589,7 @@ func TestInstruction_encode(t *testing.T) {
i.asFpuRR(fpuUniOpRoundNearest, operandNR(v1VReg), operandNR(v2VReg), true)
}},
{want: "4140611e", setup: func(i *instruction) { i.asFpuRR(fpuUniOpNeg, operandNR(v1VReg), operandNR(v2VReg), true) }},
{want: "41c4404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) }},
{want: "4201231e4201631e4201239e4201639e4201221e4201621e4201229e4201629e", setup: func(i *instruction) {
i.asNop0()
cur := i

View File

@@ -674,6 +674,24 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
m.insert(dup)
case ssa.OpcodeLoadSplat:
x, offset, lane := instr.LoadSplatData()
rd := operandNR(m.compiler.VRegOf(instr.Return()))
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
tmpReg := m.compiler.AllocateVReg(ssa.TypeI32)
// Our encoding for vecLoad1R does not support all the addressing modes yet,
// we use the no-offset addressing mode and add the offset to a temp register.
add := m.allocateInstr()
add.asALU(aluOpAdd, operandNR(tmpReg), rn, operandImm12(uint16(offset), 0), true)
m.insert(add)
ld1r := m.allocateInstr()
ld1r.asVecLoad1R(rd, operandNR(tmpReg), arr)
m.insert(ld1r)
default:
panic("TODO: lowering " + op.String())
}

View File

@@ -199,9 +199,22 @@ func TestSpectestV2(t *testing.T) {
{"simd_i16x8_extadd_pairwise_i8x16"},
{"simd_i32x4_extadd_pairwise_i16x8"},
{"simd_int_to_int_extend"},
{"simd_load"},
{"simd_load_extend"},
{"simd_load_splat"},
{"simd_load_zero"},
{"simd_load8_lane"},
{"simd_load16_lane"},
{"simd_load32_lane"},
{"simd_load64_lane"},
{"simd_lane"},
{"simd_linking"},
{"simd_splat"},
{"simd_store"},
{"simd_store8_lane"},
{"simd_store16_lane"},
{"simd_store32_lane"},
{"simd_store64_lane"},
} {
t.Run(tc.name, func(t *testing.T) {
t.Run("normal", func(t *testing.T) {

View File

@@ -1705,6 +1705,135 @@ func (c *Compiler) lowerCurrentOpcode() {
load.AsLoad(addr, offset, ssa.TypeV128)
builder.InsertInstruction(load)
state.push(load.Return())
case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane, wasm.OpcodeVecV128Load32Lane:
_, offset := c.readMemArg()
state.pc++
if state.unreachable {
break
}
var lane ssa.VecLane
var loadOp ssa.Opcode
var opSize uint64
switch vecOp {
case wasm.OpcodeVecV128Load8Lane:
loadOp, lane, opSize = ssa.OpcodeUload8, ssa.VecLaneI8x16, 1
case wasm.OpcodeVecV128Load16Lane:
loadOp, lane, opSize = ssa.OpcodeUload16, ssa.VecLaneI16x8, 2
case wasm.OpcodeVecV128Load32Lane:
loadOp, lane, opSize = ssa.OpcodeUload32, ssa.VecLaneI32x4, 4
}
laneIndex := c.wasmFunctionBody[state.pc]
vector := state.pop()
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
load := builder.AllocateInstruction().
AsExtLoad(loadOp, addr, offset, false).
Insert(builder).Return()
ret := builder.AllocateInstruction().
AsInsertlane(vector, load, laneIndex, lane).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Load64Lane:
_, offset := c.readMemArg()
state.pc++
if state.unreachable {
break
}
laneIndex := c.wasmFunctionBody[state.pc]
vector := state.pop()
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
load := builder.AllocateInstruction().
AsLoad(addr, offset, ssa.TypeI64).
Insert(builder).Return()
ret := builder.AllocateInstruction().
AsInsertlane(vector, load, laneIndex, ssa.VecLaneI64x2).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Load32zero:
_, offset := c.readMemArg()
if state.unreachable {
break
}
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), 4)
ret := builder.AllocateInstruction().
AsLoad(addr, offset, ssa.TypeF32).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Load64zero:
_, offset := c.readMemArg()
if state.unreachable {
break
}
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
ret := builder.AllocateInstruction().
AsLoad(addr, offset, ssa.TypeF64).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Load8x8u, wasm.OpcodeVecV128Load8x8s,
wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load16x4s,
wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load32x2s:
_, offset := c.readMemArg()
if state.unreachable {
break
}
var lane ssa.VecLane
var signed bool
switch vecOp {
case wasm.OpcodeVecV128Load8x8s:
signed = true
fallthrough
case wasm.OpcodeVecV128Load8x8u:
lane = ssa.VecLaneI8x16
case wasm.OpcodeVecV128Load16x4s:
signed = true
fallthrough
case wasm.OpcodeVecV128Load16x4u:
lane = ssa.VecLaneI16x8
case wasm.OpcodeVecV128Load32x2s:
signed = true
fallthrough
case wasm.OpcodeVecV128Load32x2u:
lane = ssa.VecLaneI32x4
}
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), 8)
load := builder.AllocateInstruction().
AsLoad(addr, offset, ssa.TypeV128).
Insert(builder).Return()
ret := builder.AllocateInstruction().
AsWiden(load, lane, signed, true).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat:
_, offset := c.readMemArg()
if state.unreachable {
break
}
var lane ssa.VecLane
var opSize uint64
switch vecOp {
case wasm.OpcodeVecV128Load8Splat:
lane, opSize = ssa.VecLaneI8x16, 1
case wasm.OpcodeVecV128Load16Splat:
lane, opSize = ssa.VecLaneI16x8, 2
case wasm.OpcodeVecV128Load32Splat:
lane, opSize = ssa.VecLaneI32x4, 4
case wasm.OpcodeVecV128Load64Splat:
lane, opSize = ssa.VecLaneI64x2, 8
}
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
ret := builder.AllocateInstruction().
AsLoadSplat(addr, offset, lane).
Insert(builder).Return()
state.push(ret)
case wasm.OpcodeVecV128Store:
_, offset := c.readMemArg()
if state.unreachable {
@@ -1716,7 +1845,36 @@ func (c *Compiler) lowerCurrentOpcode() {
builder.AllocateInstruction().
AsStore(ssa.OpcodeStore, value, addr, offset).
Insert(builder)
case wasm.OpcodeVecV128Store8Lane, wasm.OpcodeVecV128Store16Lane,
wasm.OpcodeVecV128Store32Lane, wasm.OpcodeVecV128Store64Lane:
_, offset := c.readMemArg()
state.pc++
if state.unreachable {
break
}
laneIndex := c.wasmFunctionBody[state.pc]
var storeOp ssa.Opcode
var lane ssa.VecLane
var opSize uint64
switch vecOp {
case wasm.OpcodeVecV128Store8Lane:
storeOp, lane, opSize = ssa.OpcodeIstore8, ssa.VecLaneI8x16, 1
case wasm.OpcodeVecV128Store16Lane:
storeOp, lane, opSize = ssa.OpcodeIstore16, ssa.VecLaneI16x8, 2
case wasm.OpcodeVecV128Store32Lane:
storeOp, lane, opSize = ssa.OpcodeIstore32, ssa.VecLaneI32x4, 4
case wasm.OpcodeVecV128Store64Lane:
storeOp, lane, opSize = ssa.OpcodeStore, ssa.VecLaneI64x2, 8
}
vector := state.pop()
baseAddr := state.pop()
addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
value := builder.AllocateInstruction().
AsExtractlane(vector, laneIndex, lane, false).
Insert(builder).Return()
builder.AllocateInstruction().
AsStore(storeOp, value, addr, offset).
Insert(builder)
case wasm.OpcodeVecV128Not:
if state.unreachable {
break

View File

@@ -308,6 +308,9 @@ const (
// `v = sload32x2 MemFlags, p, Offset`.
OpcodeSload32x2
// OpcodeLoadSplat represents a load that replicates the loaded value to all lanes `v = LoadSplat.lane MemFlags, p, Offset`.
OpcodeLoadSplat
// OpcodeIconst represents the integer const.
OpcodeIconst
@@ -712,10 +715,6 @@ const (
// OpcodeBitcast is a bitcast operation: `v = bitcast MemFlags, x`.
OpcodeBitcast
// OpcodeScalarToVector ...
// `v = scalar_to_vector s`.
OpcodeScalarToVector
// OpcodeBmask ...
// `v = bmask x`.
OpcodeBmask
@@ -881,6 +880,7 @@ var instructionSideEffects = [opcodeEnd]sideEffect{
OpcodeCtz: sideEffectNone,
OpcodePopcnt: sideEffectNone,
OpcodeLoad: sideEffectNone,
OpcodeLoadSplat: sideEffectNone,
OpcodeUload8: sideEffectNone,
OpcodeUload16: sideEffectNone,
OpcodeUload32: sideEffectNone,
@@ -1106,6 +1106,7 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{
return
},
OpcodeLoad: returnTypesFnSingle,
OpcodeLoadSplat: returnTypesFnV128,
OpcodeIadd: returnTypesFnSingle,
OpcodeIsub: returnTypesFnSingle,
OpcodeImul: returnTypesFnSingle,
@@ -1182,7 +1183,7 @@ func (i *Instruction) AsLoad(ptr Value, offset uint32, typ Type) *Instruction {
}
// AsExtLoad initializes this instruction as a store instruction with OpcodeLoad.
func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) {
func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) *Instruction {
i.opcode = op
i.v = ptr
i.u1 = uint64(offset)
@@ -1191,14 +1192,17 @@ func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bo
} else {
i.typ = TypeI32
}
return i
}
// AsSimdLoad initializes this instruction as a load instruction with OpcodeLoad 128 bit.
func (i *Instruction) AsSimdLoad(op Opcode, ptr Value, offset uint32) {
i.opcode = op
// AsLoadSplat initializes this instruction as a store instruction with OpcodeLoadSplat.
func (i *Instruction) AsLoadSplat(ptr Value, offset uint32, lane VecLane) *Instruction {
i.opcode = OpcodeLoadSplat
i.v = ptr
i.u1 = uint64(offset)
i.u2 = uint64(lane)
i.typ = TypeV128
return i
}
// LoadData returns the operands for a load instruction.
@@ -1206,6 +1210,11 @@ func (i *Instruction) LoadData() (ptr Value, offset uint32, typ Type) {
return i.v, uint32(i.u1), i.typ
}
// LoadSplatData returns the operands for a load splat instruction.
func (i *Instruction) LoadSplatData() (ptr Value, offset uint32, lane VecLane) {
return i.v, uint32(i.u1), VecLane(i.u2)
}
// AsStore initializes this instruction as a store instruction with OpcodeStore.
func (i *Instruction) AsStore(storeOp Opcode, value, ptr Value, offset uint32) *Instruction {
i.opcode = storeOp
@@ -2512,6 +2521,8 @@ func (i *Instruction) Format(b Builder) string {
instSuffix = fmt.Sprintf(" %s, %s, %#x", i.v.Format(b), i.v2.Format(b), int32(i.u1))
case OpcodeLoad:
instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
case OpcodeLoadSplat:
instSuffix = fmt.Sprintf(".%s %s, %#x", VecLane(i.u2), i.v.Format(b), int32(i.u1))
case OpcodeUload8, OpcodeUload16, OpcodeUload32, OpcodeSload8, OpcodeSload16, OpcodeSload32:
instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
case OpcodeSelect, OpcodeVbitselect:
@@ -2720,6 +2731,8 @@ func (o Opcode) String() (ret string) {
return "SsubSat"
case OpcodeLoad:
return "Load"
case OpcodeLoadSplat:
return "LoadSplat"
case OpcodeStore:
return "Store"
case OpcodeUload8:
@@ -2906,8 +2919,6 @@ func (o Opcode) String() (ret string) {
return "Nearest"
case OpcodeBitcast:
return "Bitcast"
case OpcodeScalarToVector:
return "ScalarToVector"
case OpcodeBmask:
return "Bmask"
case OpcodeIreduce:

View File

@@ -5,7 +5,6 @@ import (
"embed"
"fmt"
"runtime"
"strings"
"testing"
"github.com/tetratelabs/wazero"
@@ -49,12 +48,6 @@ func runWithInterpreter(t *testing.T, runner func(t *testing.T, r wazero.Runtime
func runWithWazevo(t *testing.T, runner func(t *testing.T, r wazero.Runtime)) {
t.Run("wazevo", func(t *testing.T) {
name := t.Name()
for _, skipTarget := range []string{"695", "701", "718"} {
if strings.Contains(name, skipTarget) {
t.Skip("TODO: skipping for wazevo until SIMD is completed")
}
}
config := wazero.NewRuntimeConfigInterpreter()
wazevo.ConfigureWazevo(config)
r := wazero.NewRuntimeWithConfig(ctx, config)