Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com> Co-authored-by: Takeshi Yoneda <takeshi@tetrate.io>
1593 lines
54 KiB
Go
1593 lines
54 KiB
Go
package compiler
|
|
|
|
import (
|
|
"github.com/tetratelabs/wazero/internal/asm"
|
|
"github.com/tetratelabs/wazero/internal/asm/arm64"
|
|
"github.com/tetratelabs/wazero/internal/wazeroir"
|
|
)
|
|
|
|
// compileV128Const implements compiler.compileV128Const for arm64.
|
|
func (c *arm64Compiler) compileV128Const(o *wazeroir.UnionOperation) error {
|
|
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
|
|
return err
|
|
}
|
|
|
|
lo, hi := o.U1, o.U2
|
|
|
|
result, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Moves the lower 64-bits as a scalar float.
|
|
intReg := arm64ReservedRegisterForTemporary
|
|
if lo == 0 {
|
|
intReg = arm64.RegRZR
|
|
} else {
|
|
c.assembler.CompileConstToRegister(arm64.MOVD, int64(lo), arm64ReservedRegisterForTemporary)
|
|
}
|
|
c.assembler.CompileRegisterToRegister(arm64.FMOVD, intReg, result)
|
|
|
|
// Then, insert the higher bits with INS(vector,general).
|
|
intReg = arm64ReservedRegisterForTemporary
|
|
if hi == 0 {
|
|
intReg = arm64.RegRZR
|
|
} else {
|
|
c.assembler.CompileConstToRegister(arm64.MOVD, int64(hi), arm64ReservedRegisterForTemporary)
|
|
}
|
|
// "ins Vn.D[1], intReg"
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, intReg, result, arm64.VectorArrangementD, 1)
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return nil
|
|
}
|
|
|
|
// compileV128Add implements compiler.compileV128Add for arm64.
|
|
func (c *arm64Compiler) compileV128Add(o *wazeroir.UnionOperation) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1r, x2r := x1.register, x2.register
|
|
|
|
var arr arm64.VectorArrangement
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
inst = arm64.VADD
|
|
arr = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI16x8:
|
|
inst = arm64.VADD
|
|
arr = arm64.VectorArrangement8H
|
|
case wazeroir.ShapeI32x4:
|
|
inst = arm64.VADD
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeI64x2:
|
|
inst = arm64.VADD
|
|
arr = arm64.VectorArrangement2D
|
|
case wazeroir.ShapeF32x4:
|
|
inst = arm64.VFADDS
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
inst = arm64.VFADDD
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(inst, x1r, x2r, arr,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(x2r)
|
|
c.markRegisterUnused(x1r)
|
|
return nil
|
|
}
|
|
|
|
// compileV128Sub implements compiler.compileV128Sub for arm64.
|
|
func (c *arm64Compiler) compileV128Sub(o *wazeroir.UnionOperation) (err error) {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1r, x2r := x1.register, x2.register
|
|
|
|
var arr arm64.VectorArrangement
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
inst = arm64.VSUB
|
|
arr = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI16x8:
|
|
inst = arm64.VSUB
|
|
arr = arm64.VectorArrangement8H
|
|
case wazeroir.ShapeI32x4:
|
|
inst = arm64.VSUB
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeI64x2:
|
|
inst = arm64.VSUB
|
|
arr = arm64.VectorArrangement2D
|
|
case wazeroir.ShapeF32x4:
|
|
inst = arm64.VFSUBS
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
inst = arm64.VFSUBD
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(inst, x2r, x1r, arr,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(x1r)
|
|
c.markRegisterUnused(x2r)
|
|
return
|
|
}
|
|
|
|
// compileV128Load implements compiler.compileV128Load for arm64.
|
|
func (c *arm64Compiler) compileV128Load(o *wazeroir.UnionOperation) (err error) {
|
|
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
|
|
return err
|
|
}
|
|
result, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
offset := uint32(o.U2)
|
|
loadType := wazeroir.V128LoadType(o.B1)
|
|
|
|
switch loadType {
|
|
case wazeroir.V128LoadType128:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 16)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ,
|
|
)
|
|
case wazeroir.V128LoadType8x8s:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
|
|
arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType8x8u:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
|
|
arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType16x4s:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
|
|
arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType16x4u:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
|
|
arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType32x2s:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result,
|
|
arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType32x2u:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result,
|
|
arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128LoadType8Splat:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
|
|
c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B)
|
|
case wazeroir.V128LoadType16Splat:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 2)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
|
|
c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H)
|
|
case wazeroir.V128LoadType32Splat:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 4)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
|
|
c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S)
|
|
case wazeroir.V128LoadType64Splat:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset)
|
|
c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D)
|
|
case wazeroir.V128LoadType32zero:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 4)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS,
|
|
)
|
|
case wazeroir.V128LoadType64zero:
|
|
offset, err := c.compileMemoryAccessOffsetSetup(offset, 8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV,
|
|
arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD,
|
|
)
|
|
}
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return
|
|
}
|
|
|
|
// compileV128LoadLane implements compiler.compileV128LoadLane for arm64.
|
|
func (c *arm64Compiler) compileV128LoadLane(o *wazeroir.UnionOperation) (err error) {
|
|
targetVector := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(targetVector); err != nil {
|
|
return
|
|
}
|
|
|
|
laneSize, laneIndex := o.B1, o.B2
|
|
offset := uint32(o.U2)
|
|
|
|
targetSizeInBytes := int64(laneSize / 8)
|
|
source, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var loadInst asm.Instruction
|
|
var arr arm64.VectorArrangement
|
|
switch laneSize {
|
|
case 8:
|
|
arr = arm64.VectorArrangementB
|
|
loadInst = arm64.LDRB
|
|
case 16:
|
|
arr = arm64.VectorArrangementH
|
|
loadInst = arm64.LDRH
|
|
case 32:
|
|
loadInst = arm64.LDRW
|
|
arr = arm64.VectorArrangementS
|
|
case 64:
|
|
loadInst = arm64.LDRD
|
|
arr = arm64.VectorArrangementD
|
|
}
|
|
|
|
c.assembler.CompileMemoryWithRegisterOffsetToRegister(loadInst, arm64ReservedRegisterForMemory, source, source)
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, source, targetVector.register, arr, arm64.VectorIndex(laneIndex))
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(targetVector.register)
|
|
c.locationStack.markRegisterUnused(source)
|
|
return
|
|
}
|
|
|
|
// compileV128Store implements compiler.compileV128Store for arm64.
|
|
func (c *arm64Compiler) compileV128Store(o *wazeroir.UnionOperation) (err error) {
|
|
v := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(v); err != nil {
|
|
return
|
|
}
|
|
|
|
const targetSizeInBytes = 16
|
|
offset := uint32(o.U2)
|
|
offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToMemoryWithRegisterOffset(arm64.VMOV,
|
|
v.register, arm64ReservedRegisterForMemory, offsetReg, arm64.VectorArrangementQ)
|
|
|
|
c.markRegisterUnused(v.register)
|
|
return
|
|
}
|
|
|
|
// compileV128StoreLane implements compiler.compileV128StoreLane for arm64.
|
|
func (c *arm64Compiler) compileV128StoreLane(o *wazeroir.UnionOperation) (err error) {
|
|
var arr arm64.VectorArrangement
|
|
var storeInst asm.Instruction
|
|
laneSize := o.B1
|
|
laneIndex := o.B2
|
|
offset := uint32(o.U2)
|
|
switch laneSize {
|
|
case 8:
|
|
storeInst = arm64.STRB
|
|
arr = arm64.VectorArrangementB
|
|
case 16:
|
|
storeInst = arm64.STRH
|
|
arr = arm64.VectorArrangementH
|
|
case 32:
|
|
storeInst = arm64.STRW
|
|
arr = arm64.VectorArrangementS
|
|
case 64:
|
|
storeInst = arm64.STRD
|
|
arr = arm64.VectorArrangementD
|
|
}
|
|
|
|
v := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(v); err != nil {
|
|
return
|
|
}
|
|
|
|
targetSizeInBytes := int64(laneSize / 8)
|
|
offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, arm64ReservedRegisterForTemporary, arr,
|
|
arm64.VectorIndex(laneIndex))
|
|
|
|
c.assembler.CompileRegisterToMemoryWithRegisterOffset(storeInst,
|
|
arm64ReservedRegisterForTemporary, arm64ReservedRegisterForMemory, offsetReg)
|
|
|
|
c.locationStack.markRegisterUnused(v.register)
|
|
return
|
|
}
|
|
|
|
// compileV128ExtractLane implements compiler.compileV128ExtractLane for arm64.
|
|
func (c *arm64Compiler) compileV128ExtractLane(o *wazeroir.UnionOperation) (err error) {
|
|
v := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(v); err != nil {
|
|
return
|
|
}
|
|
|
|
shape := o.B1
|
|
laneIndex := o.B2
|
|
signed := o.B3
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
result, err := c.allocateRegister(registerTypeGeneralPurpose)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var inst asm.Instruction
|
|
if signed {
|
|
inst = arm64.SMOV32
|
|
} else {
|
|
inst = arm64.UMOV
|
|
}
|
|
c.assembler.CompileVectorRegisterToRegister(inst, v.register, result,
|
|
arm64.VectorArrangementB, arm64.VectorIndex(laneIndex))
|
|
|
|
c.locationStack.markRegisterUnused(v.register)
|
|
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
|
|
case wazeroir.ShapeI16x8:
|
|
result, err := c.allocateRegister(registerTypeGeneralPurpose)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var inst asm.Instruction
|
|
if signed {
|
|
inst = arm64.SMOV32
|
|
} else {
|
|
inst = arm64.UMOV
|
|
}
|
|
c.assembler.CompileVectorRegisterToRegister(inst, v.register, result,
|
|
arm64.VectorArrangementH, arm64.VectorIndex(laneIndex))
|
|
|
|
c.locationStack.markRegisterUnused(v.register)
|
|
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
|
|
case wazeroir.ShapeI32x4:
|
|
result, err := c.allocateRegister(registerTypeGeneralPurpose)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result,
|
|
arm64.VectorArrangementS, arm64.VectorIndex(laneIndex))
|
|
|
|
c.locationStack.markRegisterUnused(v.register)
|
|
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
|
|
case wazeroir.ShapeI64x2:
|
|
result, err := c.allocateRegister(registerTypeGeneralPurpose)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result,
|
|
arm64.VectorArrangementD, arm64.VectorIndex(laneIndex))
|
|
|
|
c.locationStack.markRegisterUnused(v.register)
|
|
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64)
|
|
case wazeroir.ShapeF32x4:
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register,
|
|
arm64.VectorArrangementS, arm64.VectorIndex(laneIndex), 0)
|
|
c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF32)
|
|
case wazeroir.ShapeF64x2:
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register,
|
|
arm64.VectorArrangementD, arm64.VectorIndex(laneIndex), 0)
|
|
c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF64)
|
|
}
|
|
return
|
|
}
|
|
|
|
// compileV128ReplaceLane implements compiler.compileV128ReplaceLane for arm64.
|
|
func (c *arm64Compiler) compileV128ReplaceLane(o *wazeroir.UnionOperation) (err error) {
|
|
origin := c.locationStack.pop()
|
|
if err = c.compileEnsureOnRegister(origin); err != nil {
|
|
return
|
|
}
|
|
|
|
vector := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(vector); err != nil {
|
|
return
|
|
}
|
|
|
|
shape := o.B1
|
|
laneIndex := o.B2
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
|
|
arm64.VectorArrangementB, arm64.VectorIndex(laneIndex))
|
|
case wazeroir.ShapeI16x8:
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
|
|
arm64.VectorArrangementH, arm64.VectorIndex(laneIndex))
|
|
case wazeroir.ShapeI32x4:
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
|
|
arm64.VectorArrangementS, arm64.VectorIndex(laneIndex))
|
|
case wazeroir.ShapeI64x2:
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register,
|
|
arm64.VectorArrangementD, arm64.VectorIndex(laneIndex))
|
|
case wazeroir.ShapeF32x4:
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register,
|
|
arm64.VectorArrangementS, 0, arm64.VectorIndex(laneIndex))
|
|
case wazeroir.ShapeF64x2:
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register,
|
|
arm64.VectorArrangementD, 0, arm64.VectorIndex(laneIndex))
|
|
}
|
|
|
|
c.locationStack.markRegisterUnused(origin.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(vector.register)
|
|
return
|
|
}
|
|
|
|
// compileV128Splat implements compiler.compileV128Splat for arm64.
|
|
func (c *arm64Compiler) compileV128Splat(o *wazeroir.UnionOperation) (err error) {
|
|
origin := c.locationStack.pop()
|
|
if err = c.compileEnsureOnRegister(origin); err != nil {
|
|
return
|
|
}
|
|
|
|
var result asm.Register
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
result, err = c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
|
|
arm64.VectorArrangement16B, arm64.VectorIndexNone)
|
|
case wazeroir.ShapeI16x8:
|
|
result, err = c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
|
|
arm64.VectorArrangement8H, arm64.VectorIndexNone)
|
|
case wazeroir.ShapeI32x4:
|
|
result, err = c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
|
|
arm64.VectorArrangement4S, arm64.VectorIndexNone)
|
|
case wazeroir.ShapeI64x2:
|
|
result, err = c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return
|
|
}
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result,
|
|
arm64.VectorArrangement2D, arm64.VectorIndexNone)
|
|
case wazeroir.ShapeF32x4:
|
|
result = origin.register
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result,
|
|
arm64.VectorArrangementS, 0, arm64.VectorIndexNone)
|
|
case wazeroir.ShapeF64x2:
|
|
result = origin.register
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result,
|
|
arm64.VectorArrangementD, 0, arm64.VectorIndexNone)
|
|
}
|
|
|
|
c.locationStack.markRegisterUnused(origin.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return
|
|
}
|
|
|
|
func (c *arm64Compiler) onValueReleaseRegisterToStack(reg asm.Register) {
|
|
for i := uint64(0); i < c.locationStack.sp; i++ {
|
|
prevValue := &c.locationStack.stack[i]
|
|
if prevValue.register == reg {
|
|
c.compileReleaseRegisterToStack(prevValue)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// compileV128Shuffle implements compiler.compileV128Shuffle for arm64.
|
|
func (c *arm64Compiler) compileV128Shuffle(o *wazeroir.UnionOperation) (err error) {
|
|
// Shuffle needs two operands (v, w) must be next to each other.
|
|
// For simplicity, we use V29 for v and V30 for w values respectively.
|
|
const vReg, wReg = arm64.RegV29, arm64.RegV30
|
|
|
|
// Ensures that w value is placed on wReg.
|
|
w := c.locationStack.popV128()
|
|
if w.register != wReg {
|
|
// If wReg is already in use, save the value onto the stack.
|
|
c.onValueReleaseRegisterToStack(wReg)
|
|
|
|
if w.onRegister() {
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
|
|
w.register, w.register, wReg, arm64.VectorArrangement16B)
|
|
// We no longer use the old register.
|
|
c.markRegisterUnused(w.register)
|
|
} else { // on stack
|
|
w.setRegister(wReg)
|
|
c.compileLoadValueOnStackToRegister(w)
|
|
}
|
|
}
|
|
|
|
// Ensures that v value is placed on wReg.
|
|
v := c.locationStack.popV128()
|
|
if v.register != vReg {
|
|
// If vReg is already in use, save the value onto the stack.
|
|
c.onValueReleaseRegisterToStack(vReg)
|
|
|
|
if v.onRegister() {
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
|
|
v.register, v.register, vReg, arm64.VectorArrangement16B)
|
|
// We no longer use the old register.
|
|
c.markRegisterUnused(v.register)
|
|
} else { // on stack
|
|
v.setRegister(vReg)
|
|
c.compileLoadValueOnStackToRegister(v)
|
|
}
|
|
}
|
|
|
|
c.locationStack.markRegisterUsed(vReg, wReg)
|
|
result, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
lanes := make([]byte, len(o.Us))
|
|
for i, lane := range o.Us {
|
|
lanes[i] = byte(lane)
|
|
}
|
|
c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(lanes), result, arm64.VectorArrangementQ)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL2, vReg, result, arm64.VectorArrangement16B,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.locationStack.markRegisterUnused(vReg, wReg)
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return
|
|
}
|
|
|
|
// compileV128Swizzle implements compiler.compileV128Swizzle for arm64.
|
|
func (c *arm64Compiler) compileV128Swizzle(*wazeroir.UnionOperation) (err error) {
|
|
indexVec := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(indexVec); err != nil {
|
|
return
|
|
}
|
|
baseVec := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(baseVec); err != nil {
|
|
return
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL1, baseVec.register, indexVec.register,
|
|
arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.markRegisterUnused(baseVec.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(indexVec.register)
|
|
return
|
|
}
|
|
|
|
// compileV128AnyTrue implements compiler.compileV128AnyTrue for arm64.
|
|
func (c *arm64Compiler) compileV128AnyTrue(*wazeroir.UnionOperation) (err error) {
|
|
vector := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(vector); err != nil {
|
|
return
|
|
}
|
|
|
|
v := vector.register
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMAXP, v, v,
|
|
arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
|
|
arm64.VectorArrangementD, 0)
|
|
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary)
|
|
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE)
|
|
|
|
c.locationStack.markRegisterUnused(v)
|
|
return
|
|
}
|
|
|
|
// compileV128AllTrue implements compiler.compileV128AllTrue for arm64.
|
|
func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.UnionOperation) (err error) {
|
|
vector := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(vector); err != nil {
|
|
return
|
|
}
|
|
|
|
v := vector.register
|
|
shape := o.B1
|
|
if shape == wazeroir.ShapeI64x2 {
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v,
|
|
arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v,
|
|
arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v)
|
|
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ)
|
|
} else {
|
|
var arr arm64.VectorArrangement
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
arr = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI16x8:
|
|
arr = arm64.VectorArrangement8H
|
|
case wazeroir.ShapeI32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMINV, v, v,
|
|
arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
|
|
arm64.VectorArrangementD, 0)
|
|
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary)
|
|
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE)
|
|
}
|
|
c.markRegisterUnused(v)
|
|
return
|
|
}
|
|
|
|
var (
|
|
i8x16BitmaskConst = [16]byte{
|
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
|
}
|
|
i16x8BitmaskConst = [16]byte{
|
|
0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
|
|
0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
|
|
}
|
|
i32x4BitmaskConst = [16]byte{
|
|
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
|
}
|
|
)
|
|
|
|
// compileV128BitMask implements compiler.compileV128BitMask for arm64.
|
|
func (c *arm64Compiler) compileV128BitMask(o *wazeroir.UnionOperation) (err error) {
|
|
vector := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(vector); err != nil {
|
|
return
|
|
}
|
|
|
|
v := vector.register
|
|
|
|
result, err := c.allocateRegister(registerTypeGeneralPurpose)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
vecTmp, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Right arithmetic shift on the original vector and store the result into vecTmp. So we have:
|
|
// v[i] = 0xff if vi<0, 0 otherwise.
|
|
c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement16B, 7)
|
|
|
|
// Load the bit mask into vecTmp.
|
|
c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i8x16BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
|
|
|
|
// Lane-wise logical AND with i8x16BitmaskConst, meaning that we have
|
|
// v[i] = (1 << i) if vi<0, 0 otherwise.
|
|
//
|
|
// Below, we use the following notation:
|
|
// wi := (1 << i) if vi<0, 0 otherwise.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
// Swap the lower and higher 8 byte elements, and write it into vecTmp, meaning that we have
|
|
// vecTmp[i] = w(i+8) if i < 8, w(i-8) otherwise.
|
|
//
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegisterWithConst(arm64.EXT, v, v, vecTmp, arm64.VectorArrangement16B, 0x8)
|
|
|
|
// v = [w0, w8, ..., w7, w15]
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.ZIP1, vecTmp, v, v, arm64.VectorArrangement16B)
|
|
|
|
// v.h[0] = w0 + ... + w15
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
|
|
arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
// Extract the v.h[0] as the result.
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0)
|
|
case wazeroir.ShapeI16x8:
|
|
vecTmp, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Right arithmetic shift on the original vector and store the result into vecTmp. So we have:
|
|
// v[i] = 0xffff if vi<0, 0 otherwise.
|
|
c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement8H, 15)
|
|
|
|
// Load the bit mask into vecTmp.
|
|
c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i16x8BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
|
|
|
|
// Lane-wise logical AND with i16x8BitmaskConst, meaning that we have
|
|
// v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3
|
|
// = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
|
|
arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0)
|
|
case wazeroir.ShapeI32x4:
|
|
vecTmp, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Right arithmetic shift on the original vector and store the result into vecTmp. So we have:
|
|
// v[i] = 0xffffffff if vi<0, 0 otherwise.
|
|
c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement4S, 32)
|
|
|
|
// Load the bit mask into vecTmp.
|
|
c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV,
|
|
asm.NewStaticConst(i32x4BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ)
|
|
|
|
// Lane-wise logical AND with i16x8BitmaskConst, meaning that we have
|
|
// v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1]
|
|
// = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3]
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v,
|
|
arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementS, 0)
|
|
case wazeroir.ShapeI64x2:
|
|
// Move the lower 64-bit int into result,
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result,
|
|
arm64.VectorArrangementD, 0)
|
|
// Move the higher 64-bit int into arm64ReservedRegisterForTemporary.
|
|
c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary,
|
|
arm64.VectorArrangementD, 1)
|
|
|
|
// Move the sign bit into the least significant bit.
|
|
c.assembler.CompileConstToRegister(arm64.LSR, 63, result)
|
|
c.assembler.CompileConstToRegister(arm64.LSR, 63, arm64ReservedRegisterForTemporary)
|
|
|
|
// result = (arm64ReservedRegisterForTemporary<<1) | result
|
|
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
|
|
arm64ReservedRegisterForTemporary, 1, result, result)
|
|
}
|
|
|
|
c.markRegisterUnused(v)
|
|
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
|
|
return
|
|
}
|
|
|
|
// compileV128And implements compiler.compileV128And for arm64.
|
|
func (c *arm64Compiler) compileV128And(*wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.VAND, arm64.VectorArrangement16B)
|
|
}
|
|
|
|
// compileV128Not implements compiler.compileV128Not for arm64.
|
|
func (c *arm64Compiler) compileV128Not(*wazeroir.UnionOperation) error {
|
|
return c.compileV128UniOp(arm64.NOT, arm64.VectorArrangement16B)
|
|
}
|
|
|
|
// compileV128Or implements compiler.compileV128Or for arm64.
|
|
func (c *arm64Compiler) compileV128Or(*wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.VORR, arm64.VectorArrangement16B)
|
|
}
|
|
|
|
// compileV128Xor implements compiler.compileV128Xor for arm64.
|
|
func (c *arm64Compiler) compileV128Xor(*wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.EOR, arm64.VectorArrangement16B)
|
|
}
|
|
|
|
// compileV128Bitselect implements compiler.compileV128Bitselect for arm64.
|
|
func (c *arm64Compiler) compileV128Bitselect(*wazeroir.UnionOperation) error {
|
|
selector := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(selector); err != nil {
|
|
return err
|
|
}
|
|
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL,
|
|
x2.register, x1.register, selector.register, arm64.VectorArrangement16B)
|
|
|
|
c.markRegisterUnused(x1.register, x2.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(selector.register)
|
|
return nil
|
|
}
|
|
|
|
// compileV128AndNot implements compiler.compileV128AndNot for arm64.
|
|
func (c *arm64Compiler) compileV128AndNot(*wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.BIC, arm64.VectorArrangement16B)
|
|
}
|
|
|
|
func (c *arm64Compiler) compileV128UniOp(inst asm.Instruction, arr arm64.VectorArrangement) error {
|
|
v := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(v); err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(inst, v.register, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(v.register)
|
|
return nil
|
|
}
|
|
|
|
func (c *arm64Compiler) compileV128x2BinOp(inst asm.Instruction, arr arm64.VectorArrangement) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.markRegisterUnused(x2.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(x1.register)
|
|
return nil
|
|
}
|
|
|
|
// compileV128Shr implements compiler.compileV128Shr for arm64.
|
|
func (c *arm64Compiler) compileV128Shr(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
signed := o.B3
|
|
if signed {
|
|
inst = arm64.SSHL
|
|
} else {
|
|
inst = arm64.USHL
|
|
}
|
|
return c.compileV128ShiftImpl(shape, inst, true)
|
|
}
|
|
|
|
// compileV128Shl implements compiler.compileV128Shl for arm64.
|
|
func (c *arm64Compiler) compileV128Shl(o *wazeroir.UnionOperation) error {
|
|
return c.compileV128ShiftImpl(o.B1 /*shape*/, arm64.SSHL, false)
|
|
}
|
|
|
|
func (c *arm64Compiler) compileV128ShiftImpl(shape wazeroir.Shape, ins asm.Instruction, rightShift bool) error {
|
|
s := c.locationStack.pop()
|
|
if s.register == arm64.RegRZR {
|
|
// If the shift amount is zero register, nothing to do here.
|
|
return nil
|
|
}
|
|
|
|
var modulo asm.ConstantValue
|
|
var arr arm64.VectorArrangement
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16:
|
|
modulo = 0x7 // modulo 8.
|
|
arr = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI16x8:
|
|
modulo = 0xf // modulo 16.
|
|
arr = arm64.VectorArrangement8H
|
|
case wazeroir.ShapeI32x4:
|
|
modulo = 0x1f // modulo 32.
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeI64x2:
|
|
modulo = 0x3f // modulo 64.
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
|
|
if err := c.compileEnsureOnRegister(s); err != nil {
|
|
return err
|
|
}
|
|
|
|
v := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(v); err != nil {
|
|
return err
|
|
}
|
|
|
|
tmp, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.assembler.CompileConstToRegister(arm64.ANDIMM32, modulo, s.register)
|
|
|
|
if rightShift {
|
|
// Negate the amount to make this as right shift.
|
|
c.assembler.CompileRegisterToRegister(arm64.NEG, s.register, s.register)
|
|
}
|
|
|
|
// Copy the shift amount into a vector register as SSHL requires it to be there.
|
|
c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, s.register, tmp,
|
|
arr, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(ins, tmp, v.register, arr,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.markRegisterUnused(s.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(v.register)
|
|
return nil
|
|
}
|
|
|
|
// compileV128Cmp implements compiler.compileV128Cmp for arm64.
|
|
func (c *arm64Compiler) compileV128Cmp(o *wazeroir.UnionOperation) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
var arr arm64.VectorArrangement
|
|
v128CmpType := o.B1
|
|
if v128CmpType <= wazeroir.V128CmpTypeI8x16GeU {
|
|
arr = arm64.VectorArrangement16B
|
|
} else if v128CmpType <= wazeroir.V128CmpTypeI16x8GeU {
|
|
arr = arm64.VectorArrangement8H
|
|
} else if v128CmpType <= wazeroir.V128CmpTypeI32x4GeU {
|
|
arr = arm64.VectorArrangement4S
|
|
} else if v128CmpType <= wazeroir.V128CmpTypeI64x2GeS {
|
|
arr = arm64.VectorArrangement2D
|
|
} else if v128CmpType <= wazeroir.V128CmpTypeF32x4Ge {
|
|
arr = arm64.VectorArrangement4S
|
|
} else { // f64x2
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
|
|
result := x1.register
|
|
switch v128CmpType {
|
|
case wazeroir.V128CmpTypeI8x16Eq, wazeroir.V128CmpTypeI16x8Eq, wazeroir.V128CmpTypeI32x4Eq, wazeroir.V128CmpTypeI64x2Eq:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16Ne, wazeroir.V128CmpTypeI16x8Ne, wazeroir.V128CmpTypeI32x4Ne, wazeroir.V128CmpTypeI64x2Ne:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr)
|
|
// Reverse the condition by flipping all bits.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result,
|
|
arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128CmpTypeI8x16LtS, wazeroir.V128CmpTypeI16x8LtS, wazeroir.V128CmpTypeI32x4LtS, wazeroir.V128CmpTypeI64x2LtS:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI32x4LtU:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16GtS, wazeroir.V128CmpTypeI16x8GtS, wazeroir.V128CmpTypeI32x4GtS, wazeroir.V128CmpTypeI64x2GtS:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16GtU, wazeroir.V128CmpTypeI16x8GtU, wazeroir.V128CmpTypeI32x4GtU:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI64x2LeS:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16LeU, wazeroir.V128CmpTypeI16x8LeU, wazeroir.V128CmpTypeI32x4LeU:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI64x2GeS:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeI8x16GeU, wazeroir.V128CmpTypeI16x8GeU, wazeroir.V128CmpTypeI32x4GeU:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeF32x4Eq, wazeroir.V128CmpTypeF64x2Eq:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeF32x4Ne, wazeroir.V128CmpTypeF64x2Ne:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr)
|
|
// Reverse the condition by flipping all bits.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result,
|
|
arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
case wazeroir.V128CmpTypeF32x4Lt, wazeroir.V128CmpTypeF64x2Lt:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeF32x4Le, wazeroir.V128CmpTypeF64x2Le:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x1.register, x2.register, result, arr)
|
|
case wazeroir.V128CmpTypeF32x4Gt, wazeroir.V128CmpTypeF64x2Gt:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2.register, x1.register, result, arr)
|
|
case wazeroir.V128CmpTypeF32x4Ge, wazeroir.V128CmpTypeF64x2Ge:
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x2.register, x1.register, result, arr)
|
|
}
|
|
|
|
c.markRegisterUnused(x2.register)
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return nil
|
|
}
|
|
|
|
// compileV128AddSat implements compiler.compileV128AddSat for arm64.
|
|
func (c *arm64Compiler) compileV128AddSat(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
signed := o.B3
|
|
if signed {
|
|
inst = arm64.VSQADD
|
|
} else {
|
|
inst = arm64.VUQADD
|
|
}
|
|
return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
// compileV128SubSat implements compiler.compileV128SubSat for arm64.
|
|
func (c *arm64Compiler) compileV128SubSat(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
signed := o.B3
|
|
if signed {
|
|
inst = arm64.VSQSUB
|
|
} else {
|
|
inst = arm64.VUQSUB
|
|
}
|
|
return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
// compileV128Mul implements compiler.compileV128Mul for arm64.
|
|
func (c *arm64Compiler) compileV128Mul(o *wazeroir.UnionOperation) (err error) {
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeI8x16, wazeroir.ShapeI16x8, wazeroir.ShapeI32x4:
|
|
err = c.compileV128x2BinOp(arm64.VMUL, defaultArrangementForShape(shape))
|
|
case wazeroir.ShapeF32x4, wazeroir.ShapeF64x2:
|
|
err = c.compileV128x2BinOp(arm64.VFMUL, defaultArrangementForShape(shape))
|
|
case wazeroir.ShapeI64x2:
|
|
x2 := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(x2); err != nil {
|
|
return
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(x1); err != nil {
|
|
return
|
|
}
|
|
|
|
src1, src2 := x1.register, x2.register
|
|
|
|
tmp1, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.markRegisterUsed(tmp1)
|
|
|
|
tmp2, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.markRegisterUsed(tmp2)
|
|
|
|
tmp3, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.REV64, src2, tmp2,
|
|
arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VMUL, src1, tmp2, tmp2, arm64.VectorArrangement4S)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src1, tmp1,
|
|
arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.VADDP, tmp2, tmp2, arm64.VectorArrangement4S,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone,
|
|
)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src2, tmp3,
|
|
arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(arm64.SHLL, tmp2, src1,
|
|
arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VUMLAL, tmp3, tmp1, src1, arm64.VectorArrangement2S)
|
|
|
|
c.markRegisterUnused(src2, tmp1, tmp2)
|
|
c.pushVectorRuntimeValueLocationOnRegister(src1)
|
|
}
|
|
return
|
|
}
|
|
|
|
// compileV128Div implements compiler.compileV128Div for arm64.
|
|
func (c *arm64Compiler) compileV128Div(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
inst = arm64.VFDIV
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
inst = arm64.VFDIV
|
|
}
|
|
return c.compileV128x2BinOp(inst, arr)
|
|
}
|
|
|
|
// compileV128Neg implements compiler.compileV128Neg for arm64.
|
|
func (c *arm64Compiler) compileV128Neg(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
if shape <= wazeroir.ShapeI64x2 { // Integer lanes
|
|
inst = arm64.VNEG
|
|
} else { // Floating point lanes
|
|
inst = arm64.VFNEG
|
|
}
|
|
return c.compileV128UniOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
// compileV128Sqrt implements compiler.compileV128Sqrt for arm64.
|
|
func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return c.compileV128UniOp(arm64.VFSQRT, arr)
|
|
}
|
|
|
|
// compileV128Abs implements compiler.compileV128Abs for arm64.
|
|
func (c *arm64Compiler) compileV128Abs(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
if shape <= wazeroir.ShapeI64x2 { // Integer lanes
|
|
inst = arm64.VABS
|
|
} else { // Floating point lanes
|
|
inst = arm64.VFABS
|
|
}
|
|
return c.compileV128UniOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
// compileV128Popcnt implements compiler.compileV128Popcnt for arm64.
|
|
func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.UnionOperation) error {
|
|
return c.compileV128UniOp(arm64.VCNT, defaultArrangementForShape(o.B1))
|
|
}
|
|
|
|
// compileV128Min implements compiler.compileV128Min for arm64.
|
|
func (c *arm64Compiler) compileV128Min(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
signed := o.B3
|
|
if shape <= wazeroir.ShapeI64x2 { // Integer lanes
|
|
if signed {
|
|
inst = arm64.SMIN
|
|
} else {
|
|
inst = arm64.UMIN
|
|
}
|
|
} else { // Floating point lanes
|
|
inst = arm64.VFMIN
|
|
}
|
|
return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
func defaultArrangementForShape(s wazeroir.Shape) (arr arm64.VectorArrangement) {
|
|
switch s {
|
|
case wazeroir.ShapeI8x16:
|
|
arr = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI16x8:
|
|
arr = arm64.VectorArrangement8H
|
|
case wazeroir.ShapeI32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeI64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return
|
|
}
|
|
|
|
// compileV128Max implements compiler.compileV128Max for arm64.
|
|
func (c *arm64Compiler) compileV128Max(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
shape := o.B1
|
|
signed := o.B3
|
|
if shape <= wazeroir.ShapeI64x2 { // Integer lanes
|
|
if signed {
|
|
inst = arm64.SMAX
|
|
} else {
|
|
inst = arm64.UMAX
|
|
}
|
|
} else { // Floating point lanes
|
|
inst = arm64.VFMAX
|
|
}
|
|
return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape))
|
|
}
|
|
|
|
// compileV128AvgrU implements compiler.compileV128AvgrU for arm64.
|
|
func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.URHADD, defaultArrangementForShape(o.B1))
|
|
}
|
|
|
|
// compileV128Pmin implements compiler.compileV128Pmin for arm64.
|
|
func (c *arm64Compiler) compileV128Pmin(o *wazeroir.UnionOperation) error {
|
|
return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), false)
|
|
}
|
|
|
|
// compileV128Pmax implements compiler.compileV128Pmax for arm64.
|
|
func (c *arm64Compiler) compileV128Pmax(o *wazeroir.UnionOperation) error {
|
|
return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), true)
|
|
}
|
|
|
|
// compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin.
|
|
func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
result, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
x1r, x2r := x1.register, x2.register
|
|
|
|
// Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise.
|
|
if max {
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr)
|
|
} else {
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr)
|
|
}
|
|
// Select each bit based on the result bits ^.
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B)
|
|
|
|
c.markRegisterUnused(x1r, x2r)
|
|
c.pushVectorRuntimeValueLocationOnRegister(result)
|
|
return nil
|
|
}
|
|
|
|
// compileV128Ceil implements compiler.compileV128Ceil for arm64.
|
|
func (c *arm64Compiler) compileV128Ceil(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return c.compileV128UniOp(arm64.VFRINTP, arr)
|
|
}
|
|
|
|
// compileV128Floor implements compiler.compileV128Floor for arm64.
|
|
func (c *arm64Compiler) compileV128Floor(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return c.compileV128UniOp(arm64.VFRINTM, arr)
|
|
}
|
|
|
|
// compileV128Trunc implements compiler.compileV128Trunc for arm64.
|
|
func (c *arm64Compiler) compileV128Trunc(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return c.compileV128UniOp(arm64.VFRINTZ, arr)
|
|
}
|
|
|
|
// compileV128Nearest implements compiler.compileV128Nearest for arm64.
|
|
func (c *arm64Compiler) compileV128Nearest(o *wazeroir.UnionOperation) error {
|
|
var arr arm64.VectorArrangement
|
|
shape := o.B1
|
|
switch shape {
|
|
case wazeroir.ShapeF32x4:
|
|
arr = arm64.VectorArrangement4S
|
|
case wazeroir.ShapeF64x2:
|
|
arr = arm64.VectorArrangement2D
|
|
}
|
|
return c.compileV128UniOp(arm64.VFRINTN, arr)
|
|
}
|
|
|
|
// compileV128Extend implements compiler.compileV128Extend for arm64.
|
|
func (c *arm64Compiler) compileV128Extend(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
var arr arm64.VectorArrangement
|
|
originShape := o.B1
|
|
signed := o.B2 == 1
|
|
useLow := o.B3
|
|
if useLow {
|
|
if signed {
|
|
inst = arm64.SSHLL
|
|
} else {
|
|
inst = arm64.USHLL
|
|
}
|
|
|
|
switch originShape {
|
|
case wazeroir.ShapeI8x16:
|
|
arr = arm64.VectorArrangement8B
|
|
case wazeroir.ShapeI16x8:
|
|
arr = arm64.VectorArrangement4H
|
|
case wazeroir.ShapeI32x4:
|
|
arr = arm64.VectorArrangement2S
|
|
}
|
|
} else {
|
|
if signed {
|
|
inst = arm64.SSHLL2
|
|
} else {
|
|
inst = arm64.USHLL2
|
|
}
|
|
arr = defaultArrangementForShape(originShape)
|
|
}
|
|
|
|
return c.compileV128UniOp(inst, arr)
|
|
}
|
|
|
|
// compileV128ExtMul implements compiler.compileV128ExtMul for arm64.
|
|
func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
var arr arm64.VectorArrangement
|
|
originShape := o.B1
|
|
signed := o.B2 == 1
|
|
useLow := o.B3
|
|
if useLow {
|
|
if signed {
|
|
inst = arm64.SMULL
|
|
} else {
|
|
inst = arm64.UMULL
|
|
}
|
|
|
|
switch originShape {
|
|
case wazeroir.ShapeI8x16:
|
|
arr = arm64.VectorArrangement8B
|
|
case wazeroir.ShapeI16x8:
|
|
arr = arm64.VectorArrangement4H
|
|
case wazeroir.ShapeI32x4:
|
|
arr = arm64.VectorArrangement2S
|
|
}
|
|
} else {
|
|
if signed {
|
|
inst = arm64.SMULL2
|
|
} else {
|
|
inst = arm64.UMULL2
|
|
}
|
|
arr = defaultArrangementForShape(originShape)
|
|
}
|
|
|
|
return c.compileV128x2BinOp(inst, arr)
|
|
}
|
|
|
|
// compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for arm64.
|
|
func (c *arm64Compiler) compileV128Q15mulrSatS(*wazeroir.UnionOperation) error {
|
|
return c.compileV128x2BinOp(arm64.SQRDMULH, arm64.VectorArrangement8H)
|
|
}
|
|
|
|
// compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for arm64.
|
|
func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.UnionOperation) error {
|
|
var inst asm.Instruction
|
|
originShape := o.B1
|
|
signed := o.B3
|
|
if signed {
|
|
inst = arm64.SADDLP
|
|
} else {
|
|
inst = arm64.UADDLP
|
|
}
|
|
return c.compileV128UniOp(inst, defaultArrangementForShape(originShape))
|
|
}
|
|
|
|
// compileV128FloatPromote implements compiler.compileV128FloatPromote for arm64.
|
|
func (c *arm64Compiler) compileV128FloatPromote(*wazeroir.UnionOperation) error {
|
|
return c.compileV128UniOp(arm64.FCVTL, arm64.VectorArrangement2S)
|
|
}
|
|
|
|
// compileV128FloatDemote implements compiler.compileV128FloatDemote for arm64.
|
|
func (c *arm64Compiler) compileV128FloatDemote(*wazeroir.UnionOperation) error {
|
|
return c.compileV128UniOp(arm64.FCVTN, arm64.VectorArrangement2S)
|
|
}
|
|
|
|
// compileV128FConvertFromI implements compiler.compileV128FConvertFromI for arm64.
|
|
func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.UnionOperation) (err error) {
|
|
destinationShape := o.B1
|
|
signed := o.B3
|
|
|
|
if destinationShape == wazeroir.ShapeF32x4 {
|
|
if signed {
|
|
err = c.compileV128UniOp(arm64.VSCVTF, defaultArrangementForShape(destinationShape))
|
|
} else {
|
|
err = c.compileV128UniOp(arm64.VUCVTF, defaultArrangementForShape(destinationShape))
|
|
}
|
|
return
|
|
} else { // f64x2
|
|
v := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(v); err != nil {
|
|
return
|
|
}
|
|
vr := v.register
|
|
|
|
var expand, convert asm.Instruction
|
|
if signed {
|
|
expand, convert = arm64.SSHLL, arm64.VSCVTF
|
|
} else {
|
|
expand, convert = arm64.USHLL, arm64.VUCVTF
|
|
}
|
|
|
|
// Expand lower two 32-bit lanes as two 64-bit lanes.
|
|
c.assembler.CompileVectorRegisterToVectorRegisterWithConst(expand, vr, vr, arm64.VectorArrangement2S, 0)
|
|
// Convert these two 64-bit (integer) values on each lane as double precision values.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(convert, vr, vr, arm64.VectorArrangement2D,
|
|
arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
c.pushVectorRuntimeValueLocationOnRegister(vr)
|
|
}
|
|
return
|
|
}
|
|
|
|
// compileV128Dot implements compiler.compileV128Dot for arm64.
|
|
func (c *arm64Compiler) compileV128Dot(*wazeroir.UnionOperation) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
tmp, err := c.allocateRegister(registerTypeVector)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
x1r, x2r := x1.register, x2.register
|
|
|
|
// Multiply lower integers and get the 32-bit results into tmp.
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H)
|
|
// Multiply higher integers and get the 32-bit results into x1r.
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H)
|
|
// Adds these two results into x1r.
|
|
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S)
|
|
|
|
c.markRegisterUnused(x2r)
|
|
c.pushVectorRuntimeValueLocationOnRegister(x1r)
|
|
|
|
return nil
|
|
}
|
|
|
|
// compileV128Narrow implements compiler.compileV128Narrow for arm64.
|
|
func (c *arm64Compiler) compileV128Narrow(o *wazeroir.UnionOperation) error {
|
|
x2 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x2); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1 := c.locationStack.popV128()
|
|
if err := c.compileEnsureOnRegister(x1); err != nil {
|
|
return err
|
|
}
|
|
|
|
x1r, x2r := x1.register, x2.register
|
|
|
|
var arr, arr2 arm64.VectorArrangement
|
|
originShape := o.B1
|
|
signed := o.B3
|
|
switch originShape {
|
|
case wazeroir.ShapeI16x8:
|
|
arr = arm64.VectorArrangement8B
|
|
arr2 = arm64.VectorArrangement16B
|
|
case wazeroir.ShapeI32x4:
|
|
arr = arm64.VectorArrangement4H
|
|
arr2 = arm64.VectorArrangement8H
|
|
}
|
|
|
|
var lo, hi asm.Instruction
|
|
if signed {
|
|
lo, hi = arm64.SQXTN, arm64.SQXTN2
|
|
} else {
|
|
lo, hi = arm64.SQXTUN, arm64.SQXTUN2
|
|
}
|
|
|
|
// Narrow lanes on x1r and write them into lower-half of x1r.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(lo, x1r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
// Narrow lanes on x2r and write them into higher-half of x1r.
|
|
c.assembler.CompileVectorRegisterToVectorRegister(hi, x2r, x1r, arr2, arm64.VectorIndexNone, arm64.VectorIndexNone)
|
|
|
|
c.markRegisterUnused(x2r)
|
|
c.pushVectorRuntimeValueLocationOnRegister(x1r)
|
|
return nil
|
|
}
|
|
|
|
// compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for arm64.
|
|
func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.UnionOperation) (err error) {
|
|
v := c.locationStack.popV128()
|
|
if err = c.compileEnsureOnRegister(v); err != nil {
|
|
return err
|
|
}
|
|
|
|
originShape := o.B1
|
|
signed := o.B3
|
|
var cvt asm.Instruction
|
|
if signed {
|
|
cvt = arm64.VFCVTZS
|
|
} else {
|
|
cvt = arm64.VFCVTZU
|
|
}
|
|
|
|
c.assembler.CompileVectorRegisterToVectorRegister(cvt, v.register, v.register,
|
|
defaultArrangementForShape(originShape), arm64.VectorIndexNone, arm64.VectorIndexNone,
|
|
)
|
|
|
|
if originShape == wazeroir.ShapeF64x2 {
|
|
var narrow asm.Instruction
|
|
if signed {
|
|
narrow = arm64.SQXTN
|
|
} else {
|
|
narrow = arm64.UQXTN
|
|
}
|
|
c.assembler.CompileVectorRegisterToVectorRegister(narrow, v.register, v.register,
|
|
arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone,
|
|
)
|
|
}
|
|
|
|
c.pushVectorRuntimeValueLocationOnRegister(v.register)
|
|
return
|
|
}
|