Files
wazero/internal/asm/arm64/impl.go
2023-06-15 08:35:51 +10:00

3901 lines
133 KiB
Go

package arm64
import (
"encoding/binary"
"errors"
"fmt"
"github.com/tetratelabs/wazero/internal/asm"
)
type nodeImpl struct {
// jumpTarget holds the target node in the linked for the jump-kind instruction.
jumpTarget *nodeImpl
// next holds the next node from this node in the assembled linked list.
next *nodeImpl
staticConst *asm.StaticConst
instruction asm.Instruction
types operandTypes
srcReg, srcReg2, dstReg, dstReg2 asm.Register
srcConst, dstConst asm.ConstantValue
offsetInBinary asm.NodeOffsetInBinary
// readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of
// read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress.
readInstructionAddressBeforeTargetInstruction asm.Instruction
vectorArrangement VectorArrangement
srcVectorIndex, dstVectorIndex VectorIndex
}
// AssignJumpTarget implements the same method as documented on asm.Node.
func (n *nodeImpl) AssignJumpTarget(target asm.Node) {
n.jumpTarget = target.(*nodeImpl)
}
// AssignDestinationConstant implements the same method as documented on asm.Node.
func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
n.dstConst = value
}
// AssignSourceConstant implements the same method as documented on asm.Node.
func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) {
n.srcConst = value
}
// OffsetInBinary implements the same method as documented on asm.Node.
func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
return n.offsetInBinary
}
// String implements fmt.Stringer.
//
// This is for debugging purpose, and the format is similar to the AT&T assembly syntax,
// meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand
// might be embraced by '[]' to represent the memory location, and multiple operands
// are embraced by `()`.
func (n *nodeImpl) String() (ret string) {
instName := InstructionName(n.instruction)
switch n.types {
case operandTypesNoneToNone:
ret = instName
case operandTypesNoneToRegister:
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg))
case operandTypesNoneToBranch:
ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget)
case operandTypesRegisterToRegister:
ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg))
case operandTypesLeftShiftedRegisterToRegister:
ret = fmt.Sprintf("%s (%s, %s << %d), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), n.srcConst, RegisterName(n.dstReg))
case operandTypesTwoRegistersToRegister:
ret = fmt.Sprintf("%s (%s, %s), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg))
case operandTypesThreeRegistersToRegister:
ret = fmt.Sprintf("%s (%s, %s, %s), %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), RegisterName(n.dstReg2))
case operandTypesTwoRegistersToNone:
ret = fmt.Sprintf("%s (%s, %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2))
case operandTypesRegisterAndConstToNone:
ret = fmt.Sprintf("%s (%s, 0x%x)", instName, RegisterName(n.srcReg), n.srcConst)
case operandTypesRegisterAndConstToRegister:
ret = fmt.Sprintf("%s (%s, 0x%x), %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
case operandTypesRegisterToMemory:
if n.dstReg2 != asm.NilRegister {
ret = fmt.Sprintf("%s %s, [%s + %s]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), RegisterName(n.dstReg2))
} else {
ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst)
}
case operandTypesMemoryToRegister:
if n.srcReg2 != asm.NilRegister {
ret = fmt.Sprintf("%s [%s + %s], %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg))
} else {
ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
}
case operandTypesConstToRegister:
ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg))
case operandTypesRegisterToVectorRegister:
ret = fmt.Sprintf("%s %s, %s.%s[%d]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement, n.dstVectorIndex)
case operandTypesVectorRegisterToRegister:
ret = fmt.Sprintf("%s %s.%s[%d], %s", instName, RegisterName(n.srcReg), n.vectorArrangement, n.srcVectorIndex, RegisterName(n.dstReg))
case operandTypesVectorRegisterToMemory:
if n.dstReg2 != asm.NilRegister {
ret = fmt.Sprintf("%s %s.%s, [%s + %s]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), RegisterName(n.dstReg2))
} else {
ret = fmt.Sprintf("%s %s.%s, [%s + 0x%x]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), n.dstConst)
}
case operandTypesMemoryToVectorRegister:
ret = fmt.Sprintf("%s [%s], %s.%s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement)
case operandTypesVectorRegisterToVectorRegister:
ret = fmt.Sprintf("%s %[2]s.%[4]s, %[3]s.%[4]s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement)
case operandTypesStaticConstToVectorRegister:
ret = fmt.Sprintf("%s $%#x %s.%s", instName, n.staticConst.Raw, RegisterName(n.dstReg), n.vectorArrangement)
case operandTypesTwoVectorRegistersToVectorRegister:
ret = fmt.Sprintf("%s (%s.%[5]s, %[3]s.%[5]s), %[4]s.%[5]s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), n.vectorArrangement)
}
return
}
// operandTypes represents types of operands of a node.
type operandTypes byte
const (
operandTypesNoneToNone operandTypes = iota
operandTypesNoneToRegister
operandTypesNoneToBranch
operandTypesRegisterToRegister
operandTypesLeftShiftedRegisterToRegister
operandTypesTwoRegistersToRegister
operandTypesThreeRegistersToRegister
operandTypesTwoRegistersToNone
operandTypesRegisterAndConstToNone
operandTypesRegisterAndConstToRegister
operandTypesRegisterToMemory
operandTypesMemoryToRegister
operandTypesConstToRegister
operandTypesRegisterToVectorRegister
operandTypesVectorRegisterToRegister
operandTypesMemoryToVectorRegister
operandTypesVectorRegisterToMemory
operandTypesVectorRegisterToVectorRegister
operandTypesTwoVectorRegistersToVectorRegister
operandTypesStaticConstToVectorRegister
)
// String implements fmt.Stringer
func (o operandTypes) String() (ret string) {
switch o {
case operandTypesNoneToNone:
ret = "NoneToNone"
case operandTypesNoneToRegister:
ret = "NoneToRegister"
case operandTypesNoneToBranch:
ret = "NoneToBranch"
case operandTypesRegisterToRegister:
ret = "RegisterToRegister"
case operandTypesLeftShiftedRegisterToRegister:
ret = "LeftShiftedRegisterToRegister"
case operandTypesTwoRegistersToRegister:
ret = "TwoRegistersToRegister"
case operandTypesThreeRegistersToRegister:
ret = "ThreeRegistersToRegister"
case operandTypesTwoRegistersToNone:
ret = "TwoRegistersToNone"
case operandTypesRegisterAndConstToNone:
ret = "RegisterAndConstToNone"
case operandTypesRegisterAndConstToRegister:
ret = "RegisterAndConstToRegister"
case operandTypesRegisterToMemory:
ret = "RegisterToMemory"
case operandTypesMemoryToRegister:
ret = "MemoryToRegister"
case operandTypesConstToRegister:
ret = "ConstToRegister"
case operandTypesRegisterToVectorRegister:
ret = "RegisterToVectorRegister"
case operandTypesVectorRegisterToRegister:
ret = "VectorRegisterToRegister"
case operandTypesMemoryToVectorRegister:
ret = "MemoryToVectorRegister"
case operandTypesVectorRegisterToMemory:
ret = "VectorRegisterToMemory"
case operandTypesVectorRegisterToVectorRegister:
ret = "VectorRegisterToVectorRegister"
case operandTypesTwoVectorRegistersToVectorRegister:
ret = "TwoVectorRegistersToVectorRegister"
case operandTypesStaticConstToVectorRegister:
ret = "StaticConstToVectorRegister"
}
return
}
const (
maxSignedInt26 int64 = 1<<25 - 1
minSignedInt26 int64 = -(1 << 25)
maxSignedInt19 int64 = 1<<19 - 1
minSignedInt19 int64 = -(1 << 19)
)
// AssemblerImpl implements Assembler.
type AssemblerImpl struct {
root *nodeImpl
current *nodeImpl
asm.BaseAssemblerImpl
relativeJumpNodes []*nodeImpl
adrInstructionNodes []*nodeImpl
nodePool nodePool
pool asm.StaticConstPool
nodeCount int
// MaxDisplacementForConstantPool is fixed to defaultMaxDisplacementForConstPool
// but have it as a field here for testability.
MaxDisplacementForConstantPool int
temporaryRegister asm.Register
}
const nodePageSize = 128
type nodePage = [nodePageSize]nodeImpl
// nodePool is the central allocation pool for nodeImpl used by a single AssemblerImpl.
// This reduces the allocations over compilation by reusing AssemblerImpl.
type nodePool struct {
pages []*nodePage
index int
}
// allocNode allocates a new nodeImpl for use from the pool.
// This expands the pool if there is no space left for it.
func (n *nodePool) allocNode() *nodeImpl {
if n.index == nodePageSize {
if len(n.pages) == cap(n.pages) {
n.pages = append(n.pages, new(nodePage))
} else {
i := len(n.pages)
n.pages = n.pages[:i+1]
if n.pages[i] == nil {
n.pages[i] = new(nodePage)
}
}
n.index = 0
}
ret := &n.pages[len(n.pages)-1][n.index]
n.index++
return ret
}
func (n *nodePool) reset() {
for _, ns := range n.pages {
pages := ns[:]
for i := range pages {
pages[i] = nodeImpl{}
}
}
n.pages = n.pages[:0]
n.index = nodePageSize
}
func NewAssembler(temporaryRegister asm.Register) *AssemblerImpl {
return &AssemblerImpl{
nodePool: nodePool{index: nodePageSize},
temporaryRegister: temporaryRegister,
pool: asm.NewStaticConstPool(),
MaxDisplacementForConstantPool: defaultMaxDisplacementForConstPool,
}
}
// AllocateNOP implements asm.AssemblerBase.
func (a *AssemblerImpl) AllocateNOP() asm.Node {
n := a.nodePool.allocNode()
n.instruction = NOP
n.types = operandTypesNoneToNone
return n
}
// Add implements asm.AssemblerBase.
func (a *AssemblerImpl) Add(n asm.Node) {
a.addNode(n.(*nodeImpl))
}
// Reset implements asm.AssemblerBase.
func (a *AssemblerImpl) Reset() {
pool := a.pool
pool.Reset()
*a = AssemblerImpl{
nodePool: a.nodePool,
pool: pool,
temporaryRegister: a.temporaryRegister,
adrInstructionNodes: a.adrInstructionNodes[:0],
relativeJumpNodes: a.relativeJumpNodes[:0],
BaseAssemblerImpl: asm.BaseAssemblerImpl{
SetBranchTargetOnNextNodes: a.SetBranchTargetOnNextNodes[:0],
JumpTableEntries: a.JumpTableEntries[:0],
},
}
a.nodePool.reset()
}
// newNode creates a new Node and appends it into the linked list.
func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl {
n := a.nodePool.allocNode()
n.instruction = instruction
n.types = types
a.addNode(n)
return n
}
// addNode appends the new node into the linked list.
func (a *AssemblerImpl) addNode(node *nodeImpl) {
a.nodeCount++
if a.root == nil {
a.root = node
a.current = node
} else {
parent := a.current
parent.next = node
a.current = node
}
for _, o := range a.SetBranchTargetOnNextNodes {
origin := o.(*nodeImpl)
origin.jumpTarget = node
}
// Reuse the underlying slice to avoid re-allocations.
a.SetBranchTargetOnNextNodes = a.SetBranchTargetOnNextNodes[:0]
}
// Assemble implements asm.AssemblerBase
func (a *AssemblerImpl) Assemble(buf asm.Buffer) error {
// arm64 has 32-bit fixed length instructions,
// but note that some nodes are encoded as multiple instructions,
// so the resulting binary might not be the size of count*8.
buf.Grow(a.nodeCount * 8)
for n := a.root; n != nil; n = n.next {
n.offsetInBinary = uint64(buf.Len())
if err := a.encodeNode(buf, n); err != nil {
return err
}
a.maybeFlushConstPool(buf, n.next == nil)
}
code := buf.Bytes()
if err := a.FinalizeJumpTableEntry(code); err != nil {
return err
}
for _, rel := range a.relativeJumpNodes {
if err := a.relativeBranchFinalize(code, rel); err != nil {
return err
}
}
for _, adr := range a.adrInstructionNodes {
if err := a.finalizeADRInstructionNode(code, adr); err != nil {
return err
}
}
return nil
}
const defaultMaxDisplacementForConstPool = (1 << 20) - 1 - 4 // -4 for unconditional branch to skip the constants.
// maybeFlushConstPool flushes the constant pool if endOfBinary or a boundary condition was met.
func (a *AssemblerImpl) maybeFlushConstPool(buf asm.Buffer, endOfBinary bool) {
if a.pool.Empty() {
return
}
// If endOfBinary = true, we no longer need to emit the instructions, therefore
// flush all the constants.
if endOfBinary ||
// Also, if the offset between the first usage of the constant pool and
// the first constant would exceed 2^20 -1(= 2MiB-1), which is the maximum offset
// for LDR(literal)/ADR instruction, flush all the constants in the pool.
(buf.Len()+a.pool.PoolSizeInBytes-int(a.pool.FirstUseOffsetInBinary)) >= a.MaxDisplacementForConstantPool {
// Before emitting consts, we have to add br instruction to skip the const pool.
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1123-L1129
skipOffset := a.pool.PoolSizeInBytes/4 + 1
if a.pool.PoolSizeInBytes%4 != 0 {
skipOffset++
}
if endOfBinary {
// If this is the end of binary, we never reach this block,
// so offset can be zero (which is the behavior of Go's assembler).
skipOffset = 0
}
buf.Append4Bytes(
byte(skipOffset),
byte(skipOffset>>8),
byte(skipOffset>>16),
0x14,
)
// Then adding the consts into the binary.
for _, c := range a.pool.Consts {
c.SetOffsetInBinary(uint64(buf.Len()))
buf.AppendBytes(c.Raw)
}
// arm64 instructions are 4-byte (32-bit) aligned, so we must pad the zero consts here.
if pad := buf.Len() % 4; pad != 0 {
buf.AppendBytes(make([]byte, 4-pad))
}
// After the flush, reset the constant pool.
a.pool.Reset()
}
}
// encodeNode encodes the given node into writer.
func (a *AssemblerImpl) encodeNode(buf asm.Buffer, n *nodeImpl) (err error) {
switch n.types {
case operandTypesNoneToNone:
err = a.encodeNoneToNone(buf, n)
case operandTypesNoneToRegister:
err = a.encodeJumpToRegister(buf, n)
case operandTypesNoneToBranch:
err = a.encodeRelativeBranch(buf, n)
case operandTypesRegisterToRegister:
err = a.encodeRegisterToRegister(buf, n)
case operandTypesLeftShiftedRegisterToRegister:
err = a.encodeLeftShiftedRegisterToRegister(buf, n)
case operandTypesTwoRegistersToRegister:
err = a.encodeTwoRegistersToRegister(buf, n)
case operandTypesThreeRegistersToRegister:
err = a.encodeThreeRegistersToRegister(buf, n)
case operandTypesTwoRegistersToNone:
err = a.encodeTwoRegistersToNone(buf, n)
case operandTypesRegisterAndConstToNone:
err = a.encodeRegisterAndConstToNone(buf, n)
case operandTypesRegisterToMemory:
err = a.encodeRegisterToMemory(buf, n)
case operandTypesMemoryToRegister:
err = a.encodeMemoryToRegister(buf, n)
case operandTypesRegisterAndConstToRegister, operandTypesConstToRegister:
err = a.encodeConstToRegister(buf, n)
case operandTypesRegisterToVectorRegister:
err = a.encodeRegisterToVectorRegister(buf, n)
case operandTypesVectorRegisterToRegister:
err = a.encodeVectorRegisterToRegister(buf, n)
case operandTypesMemoryToVectorRegister:
err = a.encodeMemoryToVectorRegister(buf, n)
case operandTypesVectorRegisterToMemory:
err = a.encodeVectorRegisterToMemory(buf, n)
case operandTypesVectorRegisterToVectorRegister:
err = a.encodeVectorRegisterToVectorRegister(buf, n)
case operandTypesStaticConstToVectorRegister:
err = a.encodeStaticConstToVectorRegister(buf, n)
case operandTypesTwoVectorRegistersToVectorRegister:
err = a.encodeTwoVectorRegistersToVectorRegister(buf, n)
default:
err = fmt.Errorf("encoder undefined for [%s] operand type", n.types)
}
if err != nil {
err = fmt.Errorf("%w: %s", err, n) // Ensure the error is debuggable by including the string value of the node.
}
return
}
// CompileStandAlone implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
return a.newNode(instruction, operandTypesNoneToNone)
}
// CompileConstToRegister implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileConstToRegister(
instruction asm.Instruction,
value asm.ConstantValue,
destinationReg asm.Register,
) (inst asm.Node) {
n := a.newNode(instruction, operandTypesConstToRegister)
n.srcConst = value
n.dstReg = destinationReg
return n
}
// CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
n := a.newNode(instruction, operandTypesRegisterToRegister)
n.srcReg = from
n.dstReg = to
}
// CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileMemoryToRegister(
instruction asm.Instruction,
sourceBaseReg asm.Register,
sourceOffsetConst asm.ConstantValue,
destinationReg asm.Register,
) {
n := a.newNode(instruction, operandTypesMemoryToRegister)
n.srcReg = sourceBaseReg
n.srcConst = sourceOffsetConst
n.dstReg = destinationReg
}
// CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileRegisterToMemory(
instruction asm.Instruction,
sourceRegister, destinationBaseRegister asm.Register,
destinationOffsetConst asm.ConstantValue,
) {
n := a.newNode(instruction, operandTypesRegisterToMemory)
n.srcReg = sourceRegister
n.dstReg = destinationBaseRegister
n.dstConst = destinationOffsetConst
}
// CompileJump implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
return a.newNode(jmpInstruction, operandTypesNoneToBranch)
}
// CompileJumpToRegister implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
n := a.newNode(jmpInstruction, operandTypesNoneToRegister)
n.dstReg = reg
}
// CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase.
func (a *AssemblerImpl) CompileReadInstructionAddress(
destinationRegister asm.Register,
beforeAcquisitionTargetInstruction asm.Instruction,
) {
n := a.newNode(ADR, operandTypesMemoryToRegister)
n.dstReg = destinationRegister
n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
}
// CompileMemoryWithRegisterOffsetToRegister implements Assembler.CompileMemoryWithRegisterOffsetToRegister
func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToRegister(
instruction asm.Instruction,
srcBaseReg, srcOffsetReg, dstReg asm.Register,
) {
n := a.newNode(instruction, operandTypesMemoryToRegister)
n.dstReg = dstReg
n.srcReg = srcBaseReg
n.srcReg2 = srcOffsetReg
}
// CompileRegisterToMemoryWithRegisterOffset implements Assembler.CompileRegisterToMemoryWithRegisterOffset
func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterOffset(
instruction asm.Instruction,
srcReg, dstBaseReg, dstOffsetReg asm.Register,
) {
n := a.newNode(instruction, operandTypesRegisterToMemory)
n.srcReg = srcReg
n.dstReg = dstBaseReg
n.dstReg2 = dstOffsetReg
}
// CompileTwoRegistersToRegister implements Assembler.CompileTwoRegistersToRegister
func (a *AssemblerImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) {
n := a.newNode(instruction, operandTypesTwoRegistersToRegister)
n.srcReg = src1
n.srcReg2 = src2
n.dstReg = dst
}
// CompileThreeRegistersToRegister implements Assembler.CompileThreeRegistersToRegister
func (a *AssemblerImpl) CompileThreeRegistersToRegister(
instruction asm.Instruction,
src1, src2, src3, dst asm.Register,
) {
n := a.newNode(instruction, operandTypesThreeRegistersToRegister)
n.srcReg = src1
n.srcReg2 = src2
n.dstReg = src3 // To minimize the size of nodeImpl struct, we reuse dstReg for the third source operand.
n.dstReg2 = dst
}
// CompileTwoRegistersToNone implements Assembler.CompileTwoRegistersToNone
func (a *AssemblerImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) {
n := a.newNode(instruction, operandTypesTwoRegistersToNone)
n.srcReg = src1
n.srcReg2 = src2
}
// CompileRegisterAndConstToNone implements Assembler.CompileRegisterAndConstToNone
func (a *AssemblerImpl) CompileRegisterAndConstToNone(
instruction asm.Instruction,
src asm.Register,
srcConst asm.ConstantValue,
) {
n := a.newNode(instruction, operandTypesRegisterAndConstToNone)
n.srcReg = src
n.srcConst = srcConst
}
// CompileRegisterAndConstToRegister implements Assembler.CompileRegisterAndConstToRegister
func (a *AssemblerImpl) CompileRegisterAndConstToRegister(
instruction asm.Instruction,
src asm.Register,
srcConst asm.ConstantValue,
dst asm.Register,
) {
n := a.newNode(instruction, operandTypesRegisterAndConstToRegister)
n.srcReg = src
n.srcConst = srcConst
n.dstReg = dst
}
// CompileLeftShiftedRegisterToRegister implements Assembler.CompileLeftShiftedRegisterToRegister
func (a *AssemblerImpl) CompileLeftShiftedRegisterToRegister(
instruction asm.Instruction,
shiftedSourceReg asm.Register,
shiftNum asm.ConstantValue,
srcReg, dstReg asm.Register,
) {
n := a.newNode(instruction, operandTypesLeftShiftedRegisterToRegister)
n.srcReg = srcReg
n.srcReg2 = shiftedSourceReg
n.srcConst = shiftNum
n.dstReg = dstReg
}
// CompileConditionalRegisterSet implements Assembler.CompileConditionalRegisterSet
func (a *AssemblerImpl) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) {
n := a.newNode(CSET, operandTypesRegisterToRegister)
n.srcReg = conditionalRegisterStateToRegister(cond)
n.dstReg = dstReg
}
// CompileMemoryToVectorRegister implements Assembler.CompileMemoryToVectorRegister
func (a *AssemblerImpl) CompileMemoryToVectorRegister(
instruction asm.Instruction, srcBaseReg asm.Register, dstOffset asm.ConstantValue, dstReg asm.Register, arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesMemoryToVectorRegister)
n.srcReg = srcBaseReg
n.srcConst = dstOffset
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
// CompileMemoryWithRegisterOffsetToVectorRegister implements Assembler.CompileMemoryWithRegisterOffsetToVectorRegister
func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToVectorRegister(instruction asm.Instruction,
srcBaseReg, srcOffsetRegister asm.Register, dstReg asm.Register, arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesMemoryToVectorRegister)
n.srcReg = srcBaseReg
n.srcReg2 = srcOffsetRegister
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
// CompileVectorRegisterToMemory implements Assembler.CompileVectorRegisterToMemory
func (a *AssemblerImpl) CompileVectorRegisterToMemory(
instruction asm.Instruction, srcReg, dstBaseReg asm.Register, dstOffset asm.ConstantValue, arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesVectorRegisterToMemory)
n.srcReg = srcReg
n.dstReg = dstBaseReg
n.dstConst = dstOffset
n.vectorArrangement = arrangement
}
// CompileVectorRegisterToMemoryWithRegisterOffset implements Assembler.CompileVectorRegisterToMemoryWithRegisterOffset
func (a *AssemblerImpl) CompileVectorRegisterToMemoryWithRegisterOffset(instruction asm.Instruction,
srcReg, dstBaseReg, dstOffsetRegister asm.Register, arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesVectorRegisterToMemory)
n.srcReg = srcReg
n.dstReg = dstBaseReg
n.dstReg2 = dstOffsetRegister
n.vectorArrangement = arrangement
}
// CompileRegisterToVectorRegister implements Assembler.CompileRegisterToVectorRegister
func (a *AssemblerImpl) CompileRegisterToVectorRegister(
instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, index VectorIndex,
) {
n := a.newNode(instruction, operandTypesRegisterToVectorRegister)
n.srcReg = srcReg
n.dstReg = dstReg
n.vectorArrangement = arrangement
n.dstVectorIndex = index
}
// CompileVectorRegisterToRegister implements Assembler.CompileVectorRegisterToRegister
func (a *AssemblerImpl) CompileVectorRegisterToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register,
arrangement VectorArrangement, index VectorIndex,
) {
n := a.newNode(instruction, operandTypesVectorRegisterToRegister)
n.srcReg = srcReg
n.dstReg = dstReg
n.vectorArrangement = arrangement
n.srcVectorIndex = index
}
// CompileVectorRegisterToVectorRegister implements Assembler.CompileVectorRegisterToVectorRegister
func (a *AssemblerImpl) CompileVectorRegisterToVectorRegister(
instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, srcIndex, dstIndex VectorIndex,
) {
n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister)
n.srcReg = srcReg
n.dstReg = dstReg
n.vectorArrangement = arrangement
n.srcVectorIndex = srcIndex
n.dstVectorIndex = dstIndex
}
// CompileVectorRegisterToVectorRegisterWithConst implements Assembler.CompileVectorRegisterToVectorRegisterWithConst
func (a *AssemblerImpl) CompileVectorRegisterToVectorRegisterWithConst(instruction asm.Instruction,
srcReg, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue,
) {
n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister)
n.srcReg = srcReg
n.srcConst = c
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
// CompileStaticConstToRegister implements Assembler.CompileStaticConstToVectorRegister
func (a *AssemblerImpl) CompileStaticConstToRegister(instruction asm.Instruction, c *asm.StaticConst, dstReg asm.Register) {
n := a.newNode(instruction, operandTypesMemoryToRegister)
n.staticConst = c
n.dstReg = dstReg
}
// CompileStaticConstToVectorRegister implements Assembler.CompileStaticConstToVectorRegister
func (a *AssemblerImpl) CompileStaticConstToVectorRegister(instruction asm.Instruction,
c *asm.StaticConst, dstReg asm.Register, arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesStaticConstToVectorRegister)
n.staticConst = c
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
// CompileTwoVectorRegistersToVectorRegister implements Assembler.CompileTwoVectorRegistersToVectorRegister.
func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegister(instruction asm.Instruction, srcReg, srcReg2, dstReg asm.Register,
arrangement VectorArrangement,
) {
n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister)
n.srcReg = srcReg
n.srcReg2 = srcReg2
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
// CompileTwoVectorRegistersToVectorRegisterWithConst implements Assembler.CompileTwoVectorRegistersToVectorRegisterWithConst.
func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegisterWithConst(instruction asm.Instruction,
srcReg, srcReg2, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue,
) {
n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister)
n.srcReg = srcReg
n.srcReg2 = srcReg2
n.srcConst = c
n.dstReg = dstReg
n.vectorArrangement = arrangement
}
func errorEncodingUnsupported(n *nodeImpl) error {
return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types)
}
func (a *AssemblerImpl) encodeNoneToNone(buf asm.Buffer, n *nodeImpl) error {
switch n.instruction {
case UDF:
buf.Append4Bytes(0, 0, 0, 0)
return nil
case NOP:
return nil
default:
return errorEncodingUnsupported(n)
}
}
func (a *AssemblerImpl) encodeJumpToRegister(buf asm.Buffer, n *nodeImpl) error {
// "Unconditional branch (register)" in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions
var opc byte
switch n.instruction {
case RET:
opc = 0b0010
case B:
opc = 0b0000
default:
return errorEncodingUnsupported(n)
}
regBits, err := intRegisterBits(n.dstReg)
if err != nil {
return fmt.Errorf("invalid destination register: %w", err)
}
buf.Append4Bytes(
0x00|(regBits<<5),
0x00|(regBits>>3),
0b000_11111|(opc<<5),
0b1101011_0|(opc>>3),
)
return err
}
func (a *AssemblerImpl) relativeBranchFinalize(code []byte, n *nodeImpl) error {
var condBits byte
const condBitsUnconditional = 0xff // Indicates this is not conditional jump.
// https://developer.arm.com/documentation/den0024/a/CHDEEABE
switch n.instruction {
case B:
condBits = condBitsUnconditional
case BCONDEQ:
condBits = 0b0000
case BCONDGE:
condBits = 0b1010
case BCONDGT:
condBits = 0b1100
case BCONDHI:
condBits = 0b1000
case BCONDHS:
condBits = 0b0010
case BCONDLE:
condBits = 0b1101
case BCONDLO:
condBits = 0b0011
case BCONDLS:
condBits = 0b1001
case BCONDLT:
condBits = 0b1011
case BCONDMI:
condBits = 0b0100
case BCONDPL:
condBits = 0b0101
case BCONDNE:
condBits = 0b0001
case BCONDVS:
condBits = 0b0110
case BCONDVC:
condBits = 0b0111
}
branchInstOffset := int64(n.OffsetInBinary())
offset := int64(n.jumpTarget.OffsetInBinary()) - branchInstOffset
if offset%4 != 0 {
return errors.New("BUG: relative jump offset must be 4 bytes aligned")
}
branchInst := code[branchInstOffset : branchInstOffset+4]
if condBits == condBitsUnconditional {
imm26 := offset >> 2 // divide by 4.
if imm26 < minSignedInt26 || imm26 > maxSignedInt26 {
// In theory this could happen if a Wasm binary has a huge single label (more than 128MB for a single block),
// and in that case, we use load the offset into a register and do the register jump, but to avoid the complexity,
// we impose this limit for now as that would be *unlikely* happen in practice.
return fmt.Errorf("relative jump offset %d/4 must be within %d and %d", offset, minSignedInt26, maxSignedInt26)
}
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-?lang=en
branchInst[0] = byte(imm26)
branchInst[1] = byte(imm26 >> 8)
branchInst[2] = byte(imm26 >> 16)
branchInst[3] = (byte(imm26 >> 24 & 0b000000_11)) | 0b000101_00
} else {
imm19 := offset >> 2 // divide by 4.
if imm19 < minSignedInt19 || imm19 > maxSignedInt19 {
// This should be a bug in our compiler as the conditional jumps are only used in the small offsets (~a few bytes),
// and if ever happens, compiler can be fixed.
return fmt.Errorf("BUG: relative jump offset %d/4(=%d) must be within %d and %d", offset, imm19, minSignedInt19, maxSignedInt19)
}
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally-?lang=en
branchInst[0] = (byte(imm19<<5) & 0b111_0_0000) | condBits
branchInst[1] = byte(imm19 >> 3)
branchInst[2] = byte(imm19 >> 11)
branchInst[3] = 0b01010100
}
return nil
}
func (a *AssemblerImpl) encodeRelativeBranch(buf asm.Buffer, n *nodeImpl) error {
switch n.instruction {
case B, BCONDEQ, BCONDGE, BCONDGT, BCONDHI, BCONDHS, BCONDLE, BCONDLO, BCONDLS, BCONDLT, BCONDMI, BCONDNE, BCONDVS, BCONDVC, BCONDPL:
default:
return errorEncodingUnsupported(n)
}
if n.jumpTarget == nil {
return fmt.Errorf("branch target must be set for %s", InstructionName(n.instruction))
}
// At this point, we don't yet know that target's branch, so emit the placeholder (4 bytes).
buf.Append4Bytes(0, 0, 0, 0)
a.relativeJumpNodes = append(a.relativeJumpNodes, n)
return nil
}
func checkRegisterToRegisterType(src, dst asm.Register, requireSrcInt, requireDstInt bool) (err error) {
isSrcInt, isDstInt := isIntRegister(src), isIntRegister(dst)
if isSrcInt && !requireSrcInt {
err = fmt.Errorf("src requires float register but got %s", RegisterName(src))
} else if !isSrcInt && requireSrcInt {
err = fmt.Errorf("src requires int register but got %s", RegisterName(src))
} else if isDstInt && !requireDstInt {
err = fmt.Errorf("dst requires float register but got %s", RegisterName(dst))
} else if !isDstInt && requireDstInt {
err = fmt.Errorf("dst requires int register but got %s", RegisterName(dst))
}
return
}
func (a *AssemblerImpl) encodeRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
switch inst := n.instruction; inst {
case ADD, ADDW, SUB:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift
var sfops byte
switch inst {
case ADD:
sfops = 0b100
case ADDW:
case SUB:
sfops = 0b110
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
dstRegBits>>3,
srcRegBits,
(sfops<<5)|0b01011,
)
case CLZ, CLZW, RBIT, RBITW:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
var sf, opcode byte
switch inst {
case CLZ:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en
sf, opcode = 0b1, 0b000_100
case CLZW:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en
sf, opcode = 0b0, 0b000_100
case RBIT:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en
sf, opcode = 0b1, 0b000_000
case RBITW:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en
sf, opcode = 0b0, 0b000_000
}
if inst == CLZ {
sf = 1
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
opcode<<2|(srcRegBits>>3),
0b110_00000,
(sf<<7)|0b0_1011010,
)
case CSET:
if !isConditionalRegister(n.srcReg) {
return fmt.Errorf("CSET requires conditional register but got %s", RegisterName(n.srcReg))
}
dstRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
// CSET encodes the conditional bits with its least significant bit inverted.
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en
//
// https://developer.arm.com/documentation/den0024/a/CHDEEABE
var conditionalBits byte
switch n.srcReg {
case RegCondEQ:
conditionalBits = 0b0001
case RegCondNE:
conditionalBits = 0b0000
case RegCondHS:
conditionalBits = 0b0011
case RegCondLO:
conditionalBits = 0b0010
case RegCondMI:
conditionalBits = 0b0101
case RegCondPL:
conditionalBits = 0b0100
case RegCondVS:
conditionalBits = 0b0111
case RegCondVC:
conditionalBits = 0b0110
case RegCondHI:
conditionalBits = 0b1001
case RegCondLS:
conditionalBits = 0b1000
case RegCondGE:
conditionalBits = 0b1011
case RegCondLT:
conditionalBits = 0b1010
case RegCondGT:
conditionalBits = 0b1101
case RegCondLE:
conditionalBits = 0b1100
case RegCondAL:
conditionalBits = 0b1111
case RegCondNV:
conditionalBits = 0b1110
}
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en
buf.Append4Bytes(
0b111_00000|dstRegBits,
(conditionalBits<<4)|0b0000_0111,
0b100_11111,
0b10011010,
)
case FABSD, FABSS, FNEGD, FNEGS, FSQRTD, FSQRTS, FCVTSD, FCVTDS, FRINTMD, FRINTMS,
FRINTND, FRINTNS, FRINTPD, FRINTPS, FRINTZD, FRINTZS:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil {
return
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1
var tp, opcode byte
switch inst {
case FABSD:
opcode, tp = 0b000001, 0b01
case FABSS:
opcode, tp = 0b000001, 0b00
case FNEGD:
opcode, tp = 0b000010, 0b01
case FNEGS:
opcode, tp = 0b000010, 0b00
case FSQRTD:
opcode, tp = 0b000011, 0b01
case FSQRTS:
opcode, tp = 0b000011, 0b00
case FCVTSD:
opcode, tp = 0b000101, 0b00
case FCVTDS:
opcode, tp = 0b000100, 0b01
case FRINTMD:
opcode, tp = 0b001010, 0b01
case FRINTMS:
opcode, tp = 0b001010, 0b00
case FRINTND:
opcode, tp = 0b001000, 0b01
case FRINTNS:
opcode, tp = 0b001000, 0b00
case FRINTPD:
opcode, tp = 0b001001, 0b01
case FRINTPS:
opcode, tp = 0b001001, 0b00
case FRINTZD:
opcode, tp = 0b001011, 0b01
case FRINTZS:
opcode, tp = 0b001011, 0b00
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
(opcode<<7)|0b0_10000_00|(srcRegBits>>3),
tp<<6|0b00_1_00000|opcode>>1,
0b0_00_11110,
)
case FADDD, FADDS, FDIVS, FDIVD, FMAXD, FMAXS, FMIND, FMINS, FMULS, FMULD:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil {
return
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
// "Floating-point data-processing (2 source)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1
var tp, opcode byte
switch inst {
case FADDD:
opcode, tp = 0b0010, 0b01
case FADDS:
opcode, tp = 0b0010, 0b00
case FDIVD:
opcode, tp = 0b0001, 0b01
case FDIVS:
opcode, tp = 0b0001, 0b00
case FMAXD:
opcode, tp = 0b0100, 0b01
case FMAXS:
opcode, tp = 0b0100, 0b00
case FMIND:
opcode, tp = 0b0101, 0b01
case FMINS:
opcode, tp = 0b0101, 0b00
case FMULS:
opcode, tp = 0b0000, 0b00
case FMULD:
opcode, tp = 0b0000, 0b01
}
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
opcode<<4|0b0000_10_00|(dstRegBits>>3),
tp<<6|0b00_1_00000|srcRegBits,
0b0001_1110,
)
case FCVTZSD, FCVTZSDW, FCVTZSS, FCVTZSSW, FCVTZUD, FCVTZUDW, FCVTZUS, FCVTZUSW:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, true); err != nil {
return
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
// "Conversion between floating-point and integer" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1
var sf, tp, opcode byte
switch inst {
case FCVTZSD: // Double to signed 64-bit
sf, tp, opcode = 0b1, 0b01, 0b000
case FCVTZSDW: // Double to signed 32-bit.
sf, tp, opcode = 0b0, 0b01, 0b000
case FCVTZSS: // Single to signed 64-bit.
sf, tp, opcode = 0b1, 0b00, 0b000
case FCVTZSSW: // Single to signed 32-bit.
sf, tp, opcode = 0b0, 0b00, 0b000
case FCVTZUD: // Double to unsigned 64-bit.
sf, tp, opcode = 0b1, 0b01, 0b001
case FCVTZUDW: // Double to unsigned 32-bit.
sf, tp, opcode = 0b0, 0b01, 0b001
case FCVTZUS: // Single to unsigned 64-bit.
sf, tp, opcode = 0b1, 0b00, 0b001
case FCVTZUSW: // Single to unsigned 32-bit.
sf, tp, opcode = 0b0, 0b00, 0b001
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
0|(srcRegBits>>3),
tp<<6|0b00_1_11_000|opcode,
sf<<7|0b0_0_0_11110,
)
case FMOVD, FMOVS:
isSrcInt, isDstInt := isIntRegister(n.srcReg), isIntRegister(n.dstReg)
if isSrcInt && isDstInt {
return errors.New("FMOV needs at least one of operands to be integer")
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMOV--register---Floating-point-Move-register-without-conversion-?lang=en
if !isSrcInt && !isDstInt { // Float to float.
var tp byte
if inst == FMOVD {
tp = 0b01
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
0b0_10000_00|(srcRegBits>>3),
tp<<6|0b00_1_00000,
0b000_11110,
)
} else if isSrcInt && !isDstInt { // Int to float.
var tp, sf byte
if inst == FMOVD {
tp, sf = 0b01, 0b1
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
srcRegBits>>3,
tp<<6|0b00_1_00_111,
sf<<7|0b0_00_11110,
)
} else { // Float to int.
var tp, sf byte
if inst == FMOVD {
tp, sf = 0b01, 0b1
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
srcRegBits>>3,
tp<<6|0b00_1_00_110,
sf<<7|0b0_00_11110,
)
}
case MOVD, MOVW:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
if n.srcReg == RegSP || n.dstReg == RegSP {
// Moving between stack pointers.
// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
srcRegBits>>3,
0x0,
0b1001_0001,
)
return
}
if n.srcReg == RegRZR && inst == MOVD {
// If this is 64-bit mov from zero register, then we encode this as MOVK.
// See "Move wide (immediate)" in
// https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Immediate
buf.Append4Bytes(
dstRegBits,
0x0,
0b1000_0000,
0b1_10_10010,
)
} else {
// MOV can be encoded as ORR (shifted register): "ORR Wd, WZR, Wm".
// https://developer.arm.com/documentation/100069/0609/A64-General-Instructions/MOV--register-
var sf byte
if inst == MOVD {
sf = 0b1
}
buf.Append4Bytes(
(zeroRegisterBits<<5)|dstRegBits,
zeroRegisterBits>>3,
0b000_00000|srcRegBits,
sf<<7|0b0_01_01010,
)
}
case MRS:
if n.srcReg != RegFPSR {
return fmt.Errorf("MRS has only support for FPSR register as a src but got %s", RegisterName(n.srcReg))
}
// For how to specify FPSR register, see "Accessing FPSR" in:
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en
dstRegBits := registerBits(n.dstReg)
buf.Append4Bytes(
0b001<<5|dstRegBits,
0b0100<<4|0b0100,
0b0011_0000|0b11<<3|0b011,
0b1101_0101,
)
case MSR:
if n.dstReg != RegFPSR {
return fmt.Errorf("MSR has only support for FPSR register as a dst but got %s", RegisterName(n.srcReg))
}
// For how to specify FPSR register, see "Accessing FPSR" in:
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en
srcRegBits := registerBits(n.srcReg)
buf.Append4Bytes(
0b001<<5|srcRegBits,
0b0100<<4|0b0100,
0b0001_0000|0b11<<3|0b011,
0b1101_0101,
)
case MUL, MULW:
// Multiplications are encoded as MADD (zero register, src, dst), dst = zero + (src * dst) = src * dst.
// See "Data-processing (3 source)" in
// https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
var sf byte
if inst == MUL {
sf = 0b1
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
buf.Append4Bytes(
dstRegBits<<5|dstRegBits,
zeroRegisterBits<<2|dstRegBits>>3,
srcRegBits,
sf<<7|0b11011,
)
case NEG, NEGW:
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
// NEG is encoded as "SUB dst, XZR, src" = "dst = 0 - src"
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift
var sf byte
if inst == NEG {
sf = 0b1
}
buf.Append4Bytes(
(zeroRegisterBits<<5)|dstRegBits,
zeroRegisterBits>>3,
srcRegBits,
sf<<7|0b0_10_00000|0b0_00_01011,
)
case SDIV, SDIVW, UDIV, UDIVW:
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
// See "Data-processing (2 source)" in
// https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en
var sf, opcode byte
switch inst {
case SDIV:
sf, opcode = 0b1, 0b000011
case SDIVW:
sf, opcode = 0b0, 0b000011
case UDIV:
sf, opcode = 0b1, 0b000010
case UDIVW:
sf, opcode = 0b0, 0b000010
}
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
opcode<<2|(dstRegBits>>3),
0b110_00000|srcRegBits,
sf<<7|0b0_00_11010,
)
case SCVTFD, SCVTFWD, SCVTFS, SCVTFWS, UCVTFD, UCVTFS, UCVTFWD, UCVTFWS:
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, false); err != nil {
return
}
// "Conversion between floating-point and integer" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1
var sf, tp, opcode byte
switch inst {
case SCVTFD: // 64-bit integer to double
sf, tp, opcode = 0b1, 0b01, 0b010
case SCVTFWD: // 32-bit integer to double
sf, tp, opcode = 0b0, 0b01, 0b010
case SCVTFS: // 64-bit integer to single
sf, tp, opcode = 0b1, 0b00, 0b010
case SCVTFWS: // 32-bit integer to single
sf, tp, opcode = 0b0, 0b00, 0b010
case UCVTFD: // 64-bit to double
sf, tp, opcode = 0b1, 0b01, 0b011
case UCVTFWD: // 32-bit to double
sf, tp, opcode = 0b0, 0b01, 0b011
case UCVTFS: // 64-bit to single
sf, tp, opcode = 0b1, 0b00, 0b011
case UCVTFWS: // 32-bit to single
sf, tp, opcode = 0b0, 0b00, 0b011
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
srcRegBits>>3,
tp<<6|0b00_1_00_000|opcode,
sf<<7|0b0_0_0_11110,
)
case SXTB, SXTBW, SXTH, SXTHW, SXTW:
if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil {
return
}
srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg)
if n.srcReg == RegRZR {
// If the source is zero register, we encode as MOV dst, zero.
var sf byte
if inst == MOVD {
sf = 0b1
}
buf.Append4Bytes(
(zeroRegisterBits<<5)|dstRegBits,
zeroRegisterBits>>3,
0b000_00000|srcRegBits,
sf<<7|0b0_01_01010,
)
return
}
// SXTB is encoded as "SBFM Wd, Wn, #0, #7"
// https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTB
// SXTH is encoded as "SBFM Wd, Wn, #0, #15"
// https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTH
// SXTW is encoded as "SBFM Xd, Xn, #0, #31"
// https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/SXTW
var n, sf, imms, opc byte
switch inst {
case SXTB:
n, sf, imms = 0b1, 0b1, 0x7
case SXTBW:
n, sf, imms = 0b0, 0b0, 0x7
case SXTH:
n, sf, imms = 0b1, 0b1, 0xf
case SXTHW:
n, sf, imms = 0b0, 0b0, 0xf
case SXTW:
n, sf, imms = 0b1, 0b1, 0x1f
}
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
imms<<2|(srcRegBits>>3),
n<<6,
sf<<7|opc<<5|0b10011,
)
default:
return errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) encodeLeftShiftedRegisterToRegister(buf asm.Buffer, n *nodeImpl) error {
baseRegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
shiftTargetRegBits, err := intRegisterBits(n.srcReg2)
if err != nil {
return err
}
dstRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
switch n.instruction {
case ADD:
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift
const logicalLeftShiftBits = 0b00
if n.srcConst < 0 || n.srcConst > 64 {
return fmt.Errorf("shift amount must fit in unsigned 6-bit integer (0-64) but got %d", n.srcConst)
}
shiftByte := byte(n.srcConst)
buf.Append4Bytes(
(baseRegBits<<5)|dstRegBits,
(shiftByte<<2)|(baseRegBits>>3),
(logicalLeftShiftBits<<6)|shiftTargetRegBits,
0b1000_1011,
)
return err
default:
return errorEncodingUnsupported(n)
}
}
func (a *AssemblerImpl) encodeTwoRegistersToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
switch inst := n.instruction; inst {
case AND, ANDW, ORR, ORRW, EOR, EORW:
// See "Logical (shifted register)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en
srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
var sf, opc byte
switch inst {
case AND:
sf, opc = 0b1, 0b00
case ANDW:
sf, opc = 0b0, 0b00
case ORR:
sf, opc = 0b1, 0b01
case ORRW:
sf, opc = 0b0, 0b01
case EOR:
sf, opc = 0b1, 0b10
case EORW:
sf, opc = 0b0, 0b10
}
buf.Append4Bytes(
(srcReg2Bits<<5)|dstRegBits,
srcReg2Bits>>3,
srcRegBits,
sf<<7|opc<<5|0b01010,
)
case ASR, ASRW, LSL, LSLW, LSR, LSRW, ROR, RORW:
// See "Data-processing (2 source)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en
srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
var sf, opcode byte
switch inst {
case ASR:
sf, opcode = 0b1, 0b001010
case ASRW:
sf, opcode = 0b0, 0b001010
case LSL:
sf, opcode = 0b1, 0b001000
case LSLW:
sf, opcode = 0b0, 0b001000
case LSR:
sf, opcode = 0b1, 0b001001
case LSRW:
sf, opcode = 0b0, 0b001001
case ROR:
sf, opcode = 0b1, 0b001011
case RORW:
sf, opcode = 0b0, 0b001011
}
buf.Append4Bytes(
(srcReg2Bits<<5)|dstRegBits,
opcode<<2|(srcReg2Bits>>3),
0b110_00000|srcRegBits,
sf<<7|0b0_00_11010,
)
case SDIV, SDIVW, UDIV, UDIVW:
srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
// See "Data-processing (2 source)" in
// https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en
var sf, opcode byte
switch inst {
case SDIV:
sf, opcode = 0b1, 0b000011
case SDIVW:
sf, opcode = 0b0, 0b000011
case UDIV:
sf, opcode = 0b1, 0b000010
case UDIVW:
sf, opcode = 0b0, 0b000010
}
buf.Append4Bytes(
(srcReg2Bits<<5)|dstRegBits,
opcode<<2|(srcReg2Bits>>3),
0b110_00000|srcRegBits,
sf<<7|0b0_00_11010,
)
case SUB, SUBW:
srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
// See "Add/subtract (shifted register)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en
var sf byte
if inst == SUB {
sf = 0b1
}
buf.Append4Bytes(
(srcReg2Bits<<5)|dstRegBits,
srcReg2Bits>>3,
srcRegBits,
sf<<7|0b0_10_01011,
)
case FSUBD, FSUBS:
srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg)
// See "Floating-point data-processing (2 source)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var tp byte
if inst == FSUBD {
tp = 0b01
}
buf.Append4Bytes(
(srcReg2Bits<<5)|dstRegBits,
0b0011_10_00|(srcReg2Bits>>3),
tp<<6|0b00_1_00000|srcRegBits,
0b0_00_11110,
)
default:
return errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) encodeThreeRegistersToRegister(buf asm.Buffer, n *nodeImpl) error {
switch n.instruction {
case MSUB, MSUBW:
// Dst = Src2 - (Src1 * Src3)
// "Data-processing (3 source)" in:
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en
src1RegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
src2RegBits, err := intRegisterBits(n.srcReg2)
if err != nil {
return err
}
src3RegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
dstRegBits, err := intRegisterBits(n.dstReg2)
if err != nil {
return err
}
var sf byte // is zero for MSUBW (32-bit MSUB).
if n.instruction == MSUB {
sf = 0b1
}
buf.Append4Bytes(
(src3RegBits<<5)|dstRegBits,
0b1_0000000|(src2RegBits<<2)|(src3RegBits>>3),
src1RegBits,
sf<<7|0b00_11011,
)
return nil
default:
return errorEncodingUnsupported(n)
}
}
func (a *AssemblerImpl) encodeTwoRegistersToNone(buf asm.Buffer, n *nodeImpl) error {
switch n.instruction {
case CMPW, CMP:
// Compare on two registers is an alias for "SUBS (src1, src2) ZERO"
// which can be encoded as SUBS (shifted registers) with zero shifting.
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift
src1RegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
src2RegBits, err := intRegisterBits(n.srcReg2)
if err != nil {
return err
}
var op byte
if n.instruction == CMP {
op = 0b111
} else {
op = 0b011
}
buf.Append4Bytes(
(src2RegBits<<5)|zeroRegisterBits,
src2RegBits>>3,
src1RegBits,
0b01011|(op<<5),
)
return nil
case FCMPS, FCMPD:
// "Floating-point compare" section in:
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
src1RegBits, err := vectorRegisterBits(n.srcReg)
if err != nil {
return err
}
src2RegBits, err := vectorRegisterBits(n.srcReg2)
if err != nil {
return err
}
var ftype byte // is zero for FCMPS (single precision float compare).
if n.instruction == FCMPD {
ftype = 0b01
}
buf.Append4Bytes(
src2RegBits<<5,
0b001000_00|(src2RegBits>>3),
ftype<<6|0b1_00000|src1RegBits,
0b000_11110,
)
return nil
default:
return errorEncodingUnsupported(n)
}
}
func (a *AssemblerImpl) encodeRegisterAndConstToNone(buf asm.Buffer, n *nodeImpl) error {
if n.instruction != CMP {
return errorEncodingUnsupported(n)
}
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en
if n.srcConst < 0 || n.srcConst > 4095 {
return fmt.Errorf("immediate for CMP must fit in 0 to 4095 but got %d", n.srcConst)
} else if n.srcReg == RegRZR {
return errors.New("zero register is not supported for CMP (immediate)")
}
srcRegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
buf.Append4Bytes(
(srcRegBits<<5)|zeroRegisterBits,
(byte(n.srcConst)<<2)|(srcRegBits>>3),
byte(n.srcConst>>6),
0b111_10001,
)
return nil
}
func fitInSigned9Bits(v int64) bool {
return v >= -256 && v <= 255
}
func (a *AssemblerImpl) encodeLoadOrStoreWithRegisterOffset(
buf asm.Buffer, baseRegBits, offsetRegBits, targetRegBits byte, opcode, size, v byte,
) {
// See "Load/store register (register offset)".
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff
buf.Append4Bytes(
(baseRegBits<<5)|targetRegBits,
0b011_010_00|(baseRegBits>>3),
opcode<<6|0b00_1_00000|offsetRegBits,
size<<6|v<<2|0b00_111_0_00,
)
}
// validateMemoryOffset validates the memory offset if the given offset can be encoded in the assembler.
// In theory, offset can be any, but for simplicity of our homemade assembler, we limit the offset range
// that can be encoded enough for supporting compiler.
func validateMemoryOffset(offset int64) error {
if offset > 255 && offset%4 != 0 {
// This is because we only have large offsets for load/store with Wasm value stack or reading type IDs, and its offset
// is always multiplied by 4 or 8 (== the size of uint32 or uint64 == the type of wasm.FunctionTypeID or value stack in Go)
return fmt.Errorf("large memory offset (>255) must be a multiple of 4 but got %d", offset)
} else if offset < -256 { // 9-bit signed integer's minimum = 2^8.
return fmt.Errorf("negative memory offset must be larget than or equal -256 but got %d", offset)
} else if offset > 1<<31-1 {
return fmt.Errorf("large memory offset must be less than %d but got %d", 1<<31-1, offset)
} else {
return nil
}
}
// encodeLoadOrStoreWithConstOffset encodes load/store instructions with the constant offset.
//
// Note: Encoding strategy intentionally matches the Go assembler: https://go.dev/doc/asm
func (a *AssemblerImpl) encodeLoadOrStoreWithConstOffset(
buf asm.Buffer,
baseRegBits, targetRegBits byte,
offset int64,
opcode, size, v byte,
datasize, datasizeLog2 int64,
) (err error) {
if err = validateMemoryOffset(offset); err != nil {
return
}
if fitInSigned9Bits(offset) {
// See "LDAPR/STLR (unscaled immediate)"
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldapstl_unscaled
if offset < 0 || offset%datasize != 0 {
// This case is encoded as one "unscaled signed store".
buf.Append4Bytes(
(baseRegBits<<5)|targetRegBits,
byte(offset<<4)|(baseRegBits>>3),
opcode<<6|(0b00_00_11111&byte(offset>>4)),
size<<6|v<<2|0b00_1_11_0_00,
)
return
}
}
// At this point we have the assumption that offset is positive.
// Plus if it is a multiple of datasize, then it can be encoded as a single "unsigned immediate".
if offset%datasize == 0 &&
offset < (1<<12)<<datasizeLog2 {
m := offset / datasize
buf.Append4Bytes(
(baseRegBits<<5)|targetRegBits,
(byte(m<<2))|(baseRegBits>>3),
opcode<<6|0b00_111111&byte(m>>6),
size<<6|v<<2|0b00_1_11_0_01,
)
return
}
// Otherwise, we need multiple instructions.
tmpRegBits := registerBits(a.temporaryRegister)
offset32 := int32(offset)
// Go's assembler adds a const into the const pool at this point,
// regardless of its usage; e.g. if we enter the then block of the following if statement,
// the const is not used but it is added into the const pool.
c := asm.NewStaticConst(make([]byte, 4))
binary.LittleEndian.PutUint32(c.Raw, uint32(offset))
a.pool.AddConst(c, uint64(buf.Len()))
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3529-L3532
// If the offset is within 24-bits, we can load it with two ADD instructions.
hi := offset32 - (offset32 & (0xfff << uint(datasizeLog2)))
if hi&^0xfff000 == 0 {
var sfops byte = 0b100
m := ((offset32 - hi) >> datasizeLog2) & 0xfff
hi >>= 12
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3534-L3535
buf.Append4Bytes(
(baseRegBits<<5)|tmpRegBits,
(byte(hi)<<2)|(baseRegBits>>3),
0b01<<6 /* shift by 12 */ |byte(hi>>6),
sfops<<5|0b10001,
)
buf.Append4Bytes(
(tmpRegBits<<5)|targetRegBits,
(byte(m<<2))|(tmpRegBits>>3),
opcode<<6|0b00_111111&byte(m>>6),
size<<6|v<<2|0b00_1_11_0_01,
)
} else {
// This case we load the const via ldr(literal) into tem register,
// and the target const is placed after this instruction below.
loadLiteralOffsetInBinary := uint64(buf.Len())
// First we emit the ldr(literal) with offset zero as we don't yet know the const's placement in the binary.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--literal---Load-Register--literal--
buf.Append4Bytes(tmpRegBits, 0x0, 0x0, 0b00_011_0_00)
// Set the callback for the constant, and we set properly the offset in the callback.
c.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
// ldr(literal) encodes offset divided by 4.
offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4
bin := buf.Bytes()
bin[loadLiteralOffsetInBinary] |= byte(offset << 5)
bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3)
bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11)
})
// Then, load the constant with the register offset.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--register---Load-Register--register--
buf.Append4Bytes(
(baseRegBits<<5)|targetRegBits,
0b011_010_00|(baseRegBits>>3),
opcode<<6|0b00_1_00000|tmpRegBits,
size<<6|v<<2|0b00_111_0_00,
)
}
return
}
func (a *AssemblerImpl) encodeRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff
var (
size, v byte
datasize, datasizeLog2 int64
isTargetFloat bool
)
switch n.instruction {
case STRD:
size, v, datasize, datasizeLog2 = 0b11, 0x0, 8, 3
case STRW:
size, v, datasize, datasizeLog2 = 0b10, 0x0, 4, 2
case STRH:
size, v, datasize, datasizeLog2 = 0b01, 0x0, 2, 1
case STRB:
size, v, datasize, datasizeLog2 = 0b00, 0x0, 1, 0
case FSTRD:
size, v, datasize, datasizeLog2, isTargetFloat = 0b11, 0x1, 8, 3, true
case FSTRS:
size, v, datasize, datasizeLog2, isTargetFloat = 0b10, 0x1, 4, 2, true
default:
return errorEncodingUnsupported(n)
}
var srcRegBits byte
if isTargetFloat {
srcRegBits, err = vectorRegisterBits(n.srcReg)
} else {
srcRegBits, err = intRegisterBits(n.srcReg)
}
if err != nil {
return
}
baseRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
const opcode = 0x00 // opcode for store instructions.
if n.dstReg2 != asm.NilRegister {
offsetRegBits, err := intRegisterBits(n.dstReg2)
if err != nil {
return err
}
a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, srcRegBits, opcode, size, v)
} else {
err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, srcRegBits, n.dstConst, opcode, size, v, datasize, datasizeLog2)
}
return
}
func (a *AssemblerImpl) encodeADR(buf asm.Buffer, n *nodeImpl) (err error) {
dstRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
adrInstructionOffsetInBinary := uint64(buf.Len())
// At this point, we don't yet know the target offset to read from,
// so we emit the ADR instruction with 0 offset, and replace later in the callback.
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en
buf.Append4Bytes(dstRegBits, 0x0, 0x0, 0b10000)
// This case, the ADR's target offset is for the staticConst's initial address.
if sc := n.staticConst; sc != nil {
a.pool.AddConst(sc, adrInstructionOffsetInBinary)
sc.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
adrInstructionBytes := buf.Bytes()[adrInstructionOffsetInBinary : adrInstructionOffsetInBinary+4]
offset := int(offsetOfConst) - int(adrInstructionOffsetInBinary)
// See https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en
adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5)
offset >>= 2
adrInstructionBytes[0] |= byte(offset << 5)
offset >>= 3
adrInstructionBytes[1] |= byte(offset)
offset >>= 8
adrInstructionBytes[2] |= byte(offset)
})
return
} else {
a.adrInstructionNodes = append(a.adrInstructionNodes, n)
}
return
}
func (a *AssemblerImpl) finalizeADRInstructionNode(code []byte, n *nodeImpl) (err error) {
// Find the target instruction node.
targetNode := n
for ; targetNode != nil; targetNode = targetNode.next {
if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction {
targetNode = targetNode.next
break
}
}
if targetNode == nil {
return fmt.Errorf("BUG: target instruction %s not found for ADR", InstructionName(n.readInstructionAddressBeforeTargetInstruction))
}
offset := targetNode.OffsetInBinary() - n.OffsetInBinary()
if i64 := int64(offset); i64 >= 1<<20 || i64 < -1<<20 {
// We could support offset over 20-bit range by special casing them here,
// but 20-bit range should be enough for our impl. If the necessity comes up,
// we could add the special casing here to support arbitrary large offset.
return fmt.Errorf("BUG: too large offset for ADR: %#x", offset)
}
adrInstructionBytes := code[n.OffsetInBinary() : n.OffsetInBinary()+4]
// According to the binary format of ADR instruction:
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en
adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5)
offset >>= 2
adrInstructionBytes[0] |= byte(offset << 5)
offset >>= 3
adrInstructionBytes[1] |= byte(offset)
offset >>= 8
adrInstructionBytes[2] |= byte(offset)
return nil
}
func (a *AssemblerImpl) encodeMemoryToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff
var (
size, v, opcode byte
datasize, datasizeLog2 int64
isTargetFloat bool
)
switch n.instruction {
case ADR:
return a.encodeADR(buf, n)
case FLDRD:
size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b11, 0x1, 8, 3, 0b01, true
case FLDRS:
size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b10, 0x1, 4, 2, 0b01, true
case LDRD:
size, v, datasize, datasizeLog2, opcode = 0b11, 0x0, 8, 3, 0b01
case LDRW:
size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b01
case LDRSHD:
size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b10
case LDRSHW:
size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b11
case LDRH:
size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b01
case LDRSBD:
size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b10
case LDRSBW:
size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b11
case LDRB:
size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b01
case LDRSW:
size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b10
default:
return errorEncodingUnsupported(n)
}
var dstRegBits byte
if isTargetFloat {
dstRegBits, err = vectorRegisterBits(n.dstReg)
} else {
dstRegBits, err = intRegisterBits(n.dstReg)
}
if err != nil {
return
}
baseRegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
if n.srcReg2 != asm.NilRegister {
offsetRegBits, err := intRegisterBits(n.srcReg2)
if err != nil {
return err
}
a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, dstRegBits, opcode,
size, v)
} else {
err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, dstRegBits, n.srcConst, opcode,
size, v, datasize, datasizeLog2)
}
return
}
// const16bitAligned check if the value is on the 16-bit alignment.
// If so, returns the shift num divided by 16, and otherwise -1.
func const16bitAligned(v int64) (ret int) {
ret = -1
for s := 0; s < 64; s += 16 {
if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
ret = s / 16
break
}
}
return
}
// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
//
// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
//
// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
func isBitMaskImmediate(x uint64) bool {
// All zeros and ones are not "bitmask immediate" by defainition.
if x == 0 || x == 0xffff_ffff_ffff_ffff {
return false
}
switch {
case x != x>>32|x<<32:
// e = 64
case x != x>>16|x<<48:
// e = 32 (x == x>>32|x<<32).
// e.g. 0x00ff_ff00_00ff_ff00
x = uint64(int32(x))
case x != x>>8|x<<56:
// e = 16 (x == x>>16|x<<48).
// e.g. 0x00ff_00ff_00ff_00ff
x = uint64(int16(x))
case x != x>>4|x<<60:
// e = 8 (x == x>>8|x<<56).
// e.g. 0x0f0f_0f0f_0f0f_0f0f
x = uint64(int8(x))
default:
// e = 4 or 2.
return true
}
return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
}
// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
// For example: 0b1110 -> true, 0b1010 -> false
func sequenceOfSetbits(x uint64) bool {
y := getLowestBit(x)
// If x is a sequence of set bit, this should results in the number
// with only one set bit (i.e. power of two).
y += x
return (y-1)&y == 0
}
func getLowestBit(x uint64) uint64 {
// See https://stackoverflow.com/questions/12247186/find-the-lowest-set-bit
return x & (^x + 1)
}
func (a *AssemblerImpl) addOrSub64BitRegisters(buf asm.Buffer, sfops byte, sp bool, dstRegBits, src1RegBits, src2RegBits byte) {
// src1Reg = src1Reg +/- src2Reg
if sp {
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--extended-register---Add--extended-register--?lang=en
buf.Append4Bytes(
(src1RegBits<<5)|dstRegBits,
0b011<<5|src1RegBits>>3,
1<<5|src2RegBits,
sfops<<5|0b01011,
)
} else {
buf.Append4Bytes(
(src1RegBits<<5)|dstRegBits,
src1RegBits>>3,
src2RegBits,
sfops<<5|0b01011,
)
}
}
func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) {
var size uint32
switch {
case c != c>>32|c<<32:
size = 64
case c != c>>16|c<<48:
size = 32
c = uint64(int32(c))
case c != c>>8|c<<56:
size = 16
c = uint64(int16(c))
case c != c>>4|c<<60:
size = 8
c = uint64(int8(c))
case c != c>>2|c<<62:
size = 4
c = uint64(int64(c<<60) >> 60)
default:
size = 2
c = uint64(int64(c<<62) >> 62)
}
neg := false
if int64(c) < 0 {
c = ^c
neg = true
}
onesSize, nonZeroPos := getOnesSequenceSize(c)
if neg {
nonZeroPos = onesSize + nonZeroPos
onesSize = size - onesSize
}
var mode byte = 32
if is64bit {
N, mode = 0b1, 64
}
immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1))
imms = byte((onesSize - 1) | 63&^(size<<1-1))
return
}
func (a *AssemblerImpl) encodeConstToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
// Alias for readability.
c := n.srcConst
dstRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
// See "Logical (immediate)" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Immediate
switch n.instruction {
case ANDIMM32:
var sf, opc byte = 0b0, 0b00
if !isBitMaskImmediate(uint64(c)) {
err = fmt.Errorf("const %d must be valid bitmask immediate for %s", c, InstructionName(ANDIMM64))
return
}
immr, imms, N := bitmaskImmediate(uint64(c), false)
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
imms<<2|dstRegBits>>3,
N<<6|immr,
sf<<7|opc<<5|0b10010,
)
return
case ANDIMM64:
var sf, opc byte = 0b1, 0b00
if !isBitMaskImmediate(uint64(c)) {
err = fmt.Errorf("const %d must be valid bitmask immediate for %s", c, InstructionName(ANDIMM64))
return
}
immr, imms, N := bitmaskImmediate(uint64(c), true)
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
imms<<2|dstRegBits>>3,
N<<6|immr,
sf<<7|opc<<5|0b10010,
)
return
}
switch inst := n.instruction; inst {
case ADD, ADDS, SUB, SUBS:
srcRegBits := dstRegBits
if n.srcReg != asm.NilRegister {
srcRegBits, err = intRegisterBits(n.srcReg)
if err != nil {
return err
}
}
var sfops byte
if inst == ADD {
sfops = 0b100
} else if inst == ADDS {
sfops = 0b101
} else if inst == SUB {
sfops = 0b110
} else if inst == SUBS {
sfops = 0b111
}
isSP := n.srcReg == RegSP || n.dstReg == RegSP
if c == 0 {
// If the constant equals zero, we encode it as ADD (register) with zero register.
a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, zeroRegisterBits)
return
}
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
// If the const can be represented as "imm12" or "imm12 << 12": one instruction
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L2992
if c <= 0xfff {
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
(byte(c)<<2)|(srcRegBits>>3),
byte(c>>6),
sfops<<5|0b10001,
)
} else {
c >>= 12
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
(byte(c)<<2)|(srcRegBits>>3),
0b01<<6 /* shift by 12 */ |byte(c>>6),
sfops<<5|0b10001,
)
}
return
}
if t := const16bitAligned(c); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029
tmpRegBits := registerBits(a.temporaryRegister)
// MOVZ $c, tmpReg with shifting.
a.load16bitAlignedConst(buf, c>>(16*t), byte(t), tmpRegBits, false, true)
// ADD/SUB tmpReg, dstReg
a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
return
} else if t := const16bitAligned(^c); t >= 0 {
// Also if the reverse of the const can fit within 16-bit range, do the same ^^.
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029
tmpRegBits := registerBits(a.temporaryRegister)
// MOVN $c, tmpReg with shifting.
a.load16bitAlignedConst(buf, ^c>>(16*t), byte(t), tmpRegBits, true, true)
// ADD/SUB tmpReg, dstReg
a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
return
}
if uc := uint64(c); isBitMaskImmediate(uc) {
// If the const can be represented as "bitmask immediate", we load it via ORR into temp register.
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6570-L6583
tmpRegBits := registerBits(a.temporaryRegister)
// OOR $c, tmpReg
a.loadConstViaBitMaskImmediate(buf, uc, tmpRegBits, true)
// ADD/SUB tmpReg, dstReg
a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
return
}
// If the value fits within 24-bit, then we emit two add instructions
if 0 <= c && c <= 0xffffff && inst != SUBS && inst != ADDS {
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3849-L3862
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
(byte(c)<<2)|(dstRegBits>>3),
byte(c&0xfff>>6),
sfops<<5|0b10001,
)
c = c >> 12
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
(byte(c)<<2)|(dstRegBits>>3),
0b01_000000 /* shift by 12 */ |byte(c>>6),
sfops<<5|0b10001,
)
return
}
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3163-L3203
// Otherwise we use MOVZ and MOVNs for loading const into tmpRegister.
tmpRegBits := registerBits(a.temporaryRegister)
a.load64bitConst(buf, c, tmpRegBits)
a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits)
case MOVW:
if c == 0 {
buf.Append4Bytes(
(zeroRegisterBits<<5)|dstRegBits,
zeroRegisterBits>>3,
0b000_00000|zeroRegisterBits,
0b0_01_01010,
)
return
}
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
c32 := uint32(c)
ic := int64(c32)
if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c)) {
a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false)
return
}
}
if t := const16bitAligned(int64(c32)); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
a.load16bitAlignedConst(buf, int64(c32)>>(16*t), byte(t), dstRegBits, false, false)
} else if t := const16bitAligned(int64(^c32)); t >= 0 {
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
a.load16bitAlignedConst(buf, int64(^c32)>>(16*t), byte(t), dstRegBits, true, false)
} else if isBitMaskImmediate(uint64(c)) {
a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false)
} else {
// Otherwise, we use MOVZ and MOVK to load it.
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6623-L6630
c16 := uint16(c32)
// MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
buf.Append4Bytes(
(byte(c16)<<5)|dstRegBits,
byte(c16>>3),
1<<7|byte(c16>>11),
0b0_10_10010,
)
// MOVK: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK
c16 = uint16(c32 >> 16)
if c16 != 0 {
buf.Append4Bytes(
(byte(c16)<<5)|dstRegBits,
byte(c16>>3),
1<<7|0b0_01_00000 /* shift by 16 */ |byte(c16>>11),
0b0_11_10010,
)
}
}
case MOVD:
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c)) {
a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true)
return
}
}
if t := const16bitAligned(c); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
a.load16bitAlignedConst(buf, c>>(16*t), byte(t), dstRegBits, false, true)
} else if t := const16bitAligned(^c); t >= 0 {
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
a.load16bitAlignedConst(buf, (^c)>>(16*t), byte(t), dstRegBits, true, true)
} else if isBitMaskImmediate(uint64(c)) {
a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true)
} else {
a.load64bitConst(buf, c, dstRegBits)
}
case LSR:
if c == 0 {
err = errors.New("LSR with zero constant should be optimized out")
return
} else if c < 0 || c > 63 {
err = fmt.Errorf("LSR requires immediate to be within 0 to 63, but got %d", c)
return
}
// LSR(immediate) is an alias of UBFM
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
0b111111_00|dstRegBits>>3,
0b01_000000|byte(c),
0b110_10011,
)
case LSL:
if c == 0 {
err = errors.New("LSL with zero constant should be optimized out")
return
} else if c < 0 || c > 63 {
err = fmt.Errorf("LSL requires immediate to be within 0 to 63, but got %d", c)
return
}
// LSL(immediate) is an alias of UBFM
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-
cb := byte(c)
buf.Append4Bytes(
(dstRegBits<<5)|dstRegBits,
(0b111111-cb)<<2|dstRegBits>>3,
0b01_000000|(64-cb),
0b110_10011,
)
default:
return errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) movk(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK
buf.Append4Bytes(
(byte(v)<<5)|dstRegBits,
byte(v>>3),
1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
0b1_11_10010,
)
}
func (a *AssemblerImpl) movz(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
buf.Append4Bytes(
(byte(v)<<5)|dstRegBits,
byte(v>>3),
1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
0b1_10_10010,
)
}
func (a *AssemblerImpl) movn(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) {
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
buf.Append4Bytes(
(byte(v)<<5)|dstRegBits,
byte(v>>3),
1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)),
0b1_00_10010,
)
}
// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
// consts as in the Go assembler.
//
// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
func (a *AssemblerImpl) load64bitConst(buf asm.Buffer, c int64, dstRegBits byte) {
var bits [4]uint64
var zeros, negs int
for i := 0; i < 4; i++ {
bits[i] = uint64((c >> uint(i*16)) & 0xffff)
if v := bits[i]; v == 0 {
zeros++
} else if v == 0xffff {
negs++
}
}
if zeros == 3 {
// one MOVZ instruction.
for i, v := range bits {
if v != 0 {
a.movz(buf, v, i, dstRegBits)
}
}
} else if negs == 3 {
// one MOVN instruction.
for i, v := range bits {
if v != 0xffff {
v = ^v
a.movn(buf, v, i, dstRegBits)
}
}
} else if zeros == 2 {
// one MOVZ then one OVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
a.movz(buf, v, i, dstRegBits)
movz = true
} else if v != 0 {
a.movk(buf, v, i, dstRegBits)
}
}
} else if negs == 2 {
// one MOVN then one or two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
a.movn(buf, v, i, dstRegBits)
movn = true
} else if v != 0xffff {
a.movk(buf, v, i, dstRegBits)
}
}
} else if zeros == 1 {
// one MOVZ then two MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
a.movz(buf, v, i, dstRegBits)
movz = true
} else if v != 0 {
a.movk(buf, v, i, dstRegBits)
}
}
} else if negs == 1 {
// one MOVN then two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
a.movn(buf, v, i, dstRegBits)
movn = true
} else if v != 0xffff {
a.movk(buf, v, i, dstRegBits)
}
}
} else {
// one MOVZ then tree MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
a.movz(buf, v, i, dstRegBits)
movz = true
} else if v != 0 {
a.movk(buf, v, i, dstRegBits)
}
}
}
}
func (a *AssemblerImpl) load16bitAlignedConst(buf asm.Buffer, c int64, shiftNum byte, regBits byte, reverse bool, dst64bit bool) {
var lastByte byte
if reverse {
// MOVN: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ
lastByte = 0b0_00_10010
} else {
// MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
lastByte = 0b0_10_10010
}
if dst64bit {
lastByte |= 0b1 << 7
}
buf.Append4Bytes(
(byte(c)<<5)|regBits,
byte(c>>3),
1<<7|(shiftNum<<5)|byte(c>>11),
lastByte,
)
}
// loadConstViaBitMaskImmediate loads the constant with ORR (bitmask immediate).
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--?lang=en
func (a *AssemblerImpl) loadConstViaBitMaskImmediate(buf asm.Buffer, c uint64, regBits byte, dst64bit bool) {
var size uint32
switch {
case c != c>>32|c<<32:
size = 64
case c != c>>16|c<<48:
size = 32
c = uint64(int32(c))
case c != c>>8|c<<56:
size = 16
c = uint64(int16(c))
case c != c>>4|c<<60:
size = 8
c = uint64(int8(c))
case c != c>>2|c<<62:
size = 4
c = uint64(int64(c<<60) >> 60)
default:
size = 2
c = uint64(int64(c<<62) >> 62)
}
neg := false
if int64(c) < 0 {
c = ^c
neg = true
}
onesSize, nonZeroPos := getOnesSequenceSize(c)
if neg {
nonZeroPos = onesSize + nonZeroPos
onesSize = size - onesSize
}
// See the following article for understanding the encoding.
// https://dinfuehr.github.io/blog/encoding-of-immediate-values-on-aarch64/
var n byte
mode := 32
if dst64bit && size == 64 {
n = 0b1
mode = 64
}
r := byte((size - nonZeroPos) & (size - 1) & uint32(mode-1))
s := byte((onesSize - 1) | 63&^(size<<1-1))
var sf byte
if dst64bit {
sf = 0b1
}
buf.Append4Bytes(
(zeroRegisterBits<<5)|regBits,
s<<2|(zeroRegisterBits>>3),
n<<6|r,
sf<<7|0b0_01_10010,
)
}
func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) {
// Take 0b00111000 for example:
y := getLowestBit(x) // = 0b0000100
nonZeroPos = setBitPos(y) // = 2
size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3
return
}
func setBitPos(x uint64) (ret uint32) {
for ; ; ret++ {
if x == 0b1 {
break
}
x = x >> 1
}
return
}
func checkArrangementIndexPair(arr VectorArrangement, index VectorIndex) (err error) {
if arr == VectorArrangementNone {
return nil
}
var valid bool
switch arr {
case VectorArrangement8B:
valid = index < 8
case VectorArrangement16B:
valid = index < 16
case VectorArrangement4H:
valid = index < 4
case VectorArrangement8H:
valid = index < 8
case VectorArrangement2S:
valid = index < 2
case VectorArrangement4S:
valid = index < 4
case VectorArrangement1D:
valid = index < 1
case VectorArrangement2D:
valid = index < 2
case VectorArrangementB:
valid = index < 16
case VectorArrangementH:
valid = index < 8
case VectorArrangementS:
valid = index < 4
case VectorArrangementD:
valid = index < 2
}
if !valid {
err = fmt.Errorf("invalid arrangement and index pair: %s[%d]", arr, index)
}
return
}
func (a *AssemblerImpl) encodeMemoryToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
srcBaseRegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
if err != nil {
return err
}
switch n.instruction {
case VMOV: // translated as LDR(immediate,SIMD&FP)
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LDR--immediate--SIMD-FP---Load-SIMD-FP-Register--immediate-offset--?lang=en
var size, opcode byte
var dataSize, dataSizeLog2 int64
switch n.vectorArrangement {
case VectorArrangementB:
size, opcode, dataSize, dataSizeLog2 = 0b00, 0b01, 1, 0
case VectorArrangementH:
size, opcode, dataSize, dataSizeLog2 = 0b01, 0b01, 2, 1
case VectorArrangementS:
size, opcode, dataSize, dataSizeLog2 = 0b10, 0b01, 4, 2
case VectorArrangementD:
size, opcode, dataSize, dataSizeLog2 = 0b11, 0b01, 8, 3
case VectorArrangementQ:
size, opcode, dataSize, dataSizeLog2 = 0b00, 0b11, 16, 4
}
const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos
if n.srcReg2 != asm.NilRegister {
offsetRegBits, err := intRegisterBits(n.srcReg2)
if err != nil {
return err
}
a.encodeLoadOrStoreWithRegisterOffset(buf, srcBaseRegBits, offsetRegBits, dstVectorRegBits, opcode, size, v)
} else {
err = a.encodeLoadOrStoreWithConstOffset(buf, srcBaseRegBits, dstVectorRegBits,
n.srcConst, opcode, size, v, dataSize, dataSizeLog2)
}
case LD1R:
if n.srcReg2 != asm.NilRegister || n.srcConst != 0 {
return fmt.Errorf("offset for %s is not implemented", InstructionName(LD1R))
}
var size, q byte
switch n.vectorArrangement {
case VectorArrangement8B:
size, q = 0b00, 0b0
case VectorArrangement16B:
size, q = 0b00, 0b1
case VectorArrangement4H:
size, q = 0b01, 0b0
case VectorArrangement8H:
size, q = 0b01, 0b1
case VectorArrangement2S:
size, q = 0b10, 0b0
case VectorArrangement4S:
size, q = 0b10, 0b1
case VectorArrangement1D:
size, q = 0b11, 0b0
case VectorArrangement2D:
size, q = 0b11, 0b1
}
// No offset encoding.
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#iclass_as_post_index
buf.Append4Bytes(
(srcBaseRegBits<<5)|dstVectorRegBits,
0b11_000000|size<<2|srcBaseRegBits>>3,
0b01_000000,
q<<6|0b1101,
)
default:
return errorEncodingUnsupported(n)
}
return
}
func arrangementSizeQ(arr VectorArrangement) (size, q byte) {
switch arr {
case VectorArrangement8B:
size, q = 0b00, 0
case VectorArrangement16B:
size, q = 0b00, 1
case VectorArrangement4H:
size, q = 0b01, 0
case VectorArrangement8H:
size, q = 0b01, 1
case VectorArrangement2S:
size, q = 0b10, 0
case VectorArrangement4S:
size, q = 0b10, 1
case VectorArrangement1D:
size, q = 0b11, 0
case VectorArrangement2D:
size, q = 0b11, 1
}
return
}
func (a *AssemblerImpl) encodeVectorRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
srcVectorRegBits, err := vectorRegisterBits(n.srcReg)
if err != nil {
return err
}
dstBaseRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
switch n.instruction {
case VMOV: // translated as STR(immediate,SIMD&FP)
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/STR--immediate--SIMD-FP---Store-SIMD-FP-register--immediate-offset--
var size, opcode byte
var dataSize, dataSizeLog2 int64
switch n.vectorArrangement {
case VectorArrangementB:
size, opcode, dataSize, dataSizeLog2 = 0b00, 0b00, 1, 0
case VectorArrangementH:
size, opcode, dataSize, dataSizeLog2 = 0b01, 0b00, 2, 1
case VectorArrangementS:
size, opcode, dataSize, dataSizeLog2 = 0b10, 0b00, 4, 2
case VectorArrangementD:
size, opcode, dataSize, dataSizeLog2 = 0b11, 0b00, 8, 3
case VectorArrangementQ:
size, opcode, dataSize, dataSizeLog2 = 0b00, 0b10, 16, 4
}
const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos
if n.dstReg2 != asm.NilRegister {
offsetRegBits, err := intRegisterBits(n.dstReg2)
if err != nil {
return err
}
a.encodeLoadOrStoreWithRegisterOffset(buf, dstBaseRegBits, offsetRegBits, srcVectorRegBits, opcode, size, v)
} else {
err = a.encodeLoadOrStoreWithConstOffset(buf, dstBaseRegBits, srcVectorRegBits,
n.dstConst, opcode, size, v, dataSize, dataSizeLog2)
}
default:
return errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) encodeStaticConstToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
if n.instruction != VMOV {
return errorEncodingUnsupported(n)
}
dstRegBits, err := vectorRegisterBits(n.dstReg)
if err != nil {
return err
}
// LDR (literal, SIMD&FP)
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--
var opc byte
var constLength int
switch n.vectorArrangement {
case VectorArrangementS:
opc, constLength = 0b00, 4
case VectorArrangementD:
opc, constLength = 0b01, 8
case VectorArrangementQ:
opc, constLength = 0b10, 16
}
loadLiteralOffsetInBinary := uint64(buf.Len())
a.pool.AddConst(n.staticConst, loadLiteralOffsetInBinary)
if len(n.staticConst.Raw) != constLength {
return fmt.Errorf("invalid const length for %s: want %d but was %d",
n.vectorArrangement, constLength, len(n.staticConst.Raw))
}
buf.Append4Bytes(dstRegBits, 0x0, 0x0, opc<<6|0b11100)
n.staticConst.AddOffsetFinalizedCallback(func(offsetOfConst uint64) {
// LDR (literal, SIMD&FP) encodes offset divided by 4.
offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4
bin := buf.Bytes()
bin[loadLiteralOffsetInBinary] |= byte(offset << 5)
bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3)
bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11)
})
return
}
// advancedSIMDTwoRegisterMisc holds information to encode instructions as "Advanced SIMD two-register miscellaneous" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDTwoRegisterMisc = map[asm.Instruction]struct {
qAndSize map[VectorArrangement]qAndSize
u, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NOT--Bitwise-NOT--vector--?lang=en
NOT: {
u: 0b1, opcode: 0b00101,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FNEG--vector---Floating-point-Negate--vector--?lang=en
VFNEG: {
u: 0b1, opcode: 0b01111,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FABS--vector---Floating-point-Absolute-value--vector--?lang=en
VFABS: {u: 0, opcode: 0b01111, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b11, q: 0b1},
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSQRT--vector---Floating-point-Square-Root--vector--?lang=en
VFSQRT: {u: 1, opcode: 0b11111, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b11, q: 0b1},
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTM--vector---Floating-point-Round-to-Integral--toward-Minus-infinity--vector--?lang=en
VFRINTM: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b01, q: 0b1},
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTN--vector---Floating-point-Round-to-Integral--to-nearest-with-ties-to-even--vector--?lang=en
VFRINTN: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b01, q: 0b1},
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTP--vector---Floating-point-Round-to-Integral--toward-Plus-infinity--vector--?lang=en
VFRINTP: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b11, q: 0b1},
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTZ--vector---Floating-point-Round-to-Integral--toward-Zero--vector--?lang=en
VFRINTZ: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b11, q: 0b1},
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CNT--Population-Count-per-byte-?lang=en
VCNT: {u: 0b0, opcode: 0b00101, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {size: 0b00, q: 0b0},
VectorArrangement16B: {size: 0b00, q: 0b1},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NEG--vector---Negate--vector--?lang=en
VNEG: {u: 0b1, opcode: 0b01011, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ABS--Absolute-value--vector--?lang=en
VABS: {u: 0b0, opcode: 0b01011, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/REV64--Reverse-elements-in-64-bit-doublewords--vector--?lang=en
REV64: {u: 0b0, opcode: 0b00000, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/XTN--XTN2--Extract-Narrow-?lang=en
XTN: {u: 0b0, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {q: 0, size: 0b10},
VectorArrangement4S: {q: 0, size: 0b01},
VectorArrangement8H: {q: 0, size: 0b00},
}},
SHLL: {u: 0b1, opcode: 0b10011, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b00, size: 0b00},
VectorArrangement4H: {q: 0b00, size: 0b01},
VectorArrangement2S: {q: 0b00, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--zero---Compare-bitwise-Equal-to-zero--vector--?lang=en
CMEQZERO: {u: 0b0, opcode: 0b01001, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SADDLP--Signed-Add-Long-Pairwise-?lang=en
SADDLP: {u: 0b0, opcode: 0b00010, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UADDLP--Unsigned-Add-Long-Pairwise-?lang=en
UADDLP: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-Convert-to-Signed-integer--rounding-toward-Zero--vector--?lang=en
VFCVTZS: {u: 0b0, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZU--vector--integer---Floating-point-Convert-to-Unsigned-integer--rounding-toward-Zero--vector--?lang=en
VFCVTZU: {u: 0b1, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en
SQXTN: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b0, size: 0b00},
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement2S: {q: 0b0, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en
SQXTN2: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {q: 0b1, size: 0b00},
VectorArrangement8H: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQXTN--UQXTN2--Unsigned-saturating-extract-Narrow-?lang=en
UQXTN: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en
SQXTUN: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b0, size: 0b00},
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement2S: {q: 0b0, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en
SQXTUN2: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {q: 0b1, size: 0b00},
VectorArrangement8H: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-Convert-to-Floating-point--vector--?lang=en
VSCVTF: {u: 0b0, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b00},
VectorArrangement2S: {q: 0b0, size: 0b00},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-Convert-to-Floating-point--vector--?lang=en
VUCVTF: {u: 0b1, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b00},
VectorArrangement2S: {q: 0b0, size: 0b00},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTL--FCVTL2--Floating-point-Convert-to-higher-precision-Long--vector--?lang=en
FCVTL: {u: 0b0, opcode: 0b10111, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2S: {size: 0b01, q: 0b0},
VectorArrangement4H: {size: 0b00, q: 0b0},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTN--FCVTN2--Floating-point-Convert-to-lower-precision-Narrow--vector--?lang=en
FCVTN: {u: 0b0, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2S: {size: 0b01, q: 0b0},
VectorArrangement4H: {size: 0b00, q: 0b0},
}},
}
// advancedSIMDThreeDifferent holds information to encode instructions as "Advanced SIMD three different" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDThreeDifferent = map[asm.Instruction]struct {
qAndSize map[VectorArrangement]qAndSize
u, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMLAL--UMLAL2--vector---Unsigned-Multiply-Add-Long--vector--?lang=en
VUMLAL: {u: 0b1, opcode: 0b1000, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2S: {q: 0b0, size: 0b10},
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement8B: {q: 0b0, size: 0b00},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en
SMULL: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b0, size: 0b00},
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement2S: {q: 0b0, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en
SMULL2: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {q: 0b1, size: 0b00},
VectorArrangement8H: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
UMULL: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b0, size: 0b00},
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement2S: {q: 0b0, size: 0b10},
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
UMULL2: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {q: 0b1, size: 0b00},
VectorArrangement8H: {q: 0b1, size: 0b01},
VectorArrangement4S: {q: 0b1, size: 0b10},
}},
}
// advancedSIMDThreeSame holds information to encode instructions as "Advanced SIMD three same" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDThreeSame = map[asm.Instruction]struct {
qAndSize map[VectorArrangement]qAndSize
u, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/AND--vector---Bitwise-AND--vector--?lang=en
VAND: {
u: 0b0, opcode: 0b00011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BSL--Bitwise-Select-?lang=en
BSL: {
u: 0b1, opcode: 0b00011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b01, q: 0b1},
VectorArrangement8B: {size: 0b01, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EOR--vector---Bitwise-Exclusive-OR--vector--?lang=en
EOR: {
u: 0b1, opcode: 0b00011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ORR--vector--register---Bitwise-inclusive-OR--vector--register--?lang=en
VORR: {
u: 0b0, opcode: 0b00011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b10, q: 0b1},
VectorArrangement8B: {size: 0b10, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIC--vector--register---Bitwise-bit-Clear--vector--register--?lang=en
BIC: {
u: 0b0, opcode: 0b00011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b01, q: 0b1},
VectorArrangement8B: {size: 0b01, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en
VFADDS: {
u: 0b0, opcode: 0b11010,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en
VFADDD: {
u: 0b0, opcode: 0b11010,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en
VFSUBS: {
u: 0b0, opcode: 0b11010,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en
VFSUBD: {
u: 0b0, opcode: 0b11010,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement2D: {size: 0b11, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAXP--Unsigned-Maximum-Pairwise-?lang=en
UMAXP: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--register---Compare-bitwise-Equal--vector--?lang=en
CMEQ: {u: 0b1, opcode: 0b10001, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/ADDP--vector-
VADDP: {u: 0b0, opcode: 0b10111, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector---Add--vector--?lang=en
VADD: {u: 0, opcode: 0b10000, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SUB--vector---Subtract--vector--?lang=en
VSUB: {u: 1, opcode: 0b10000, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en
SSHL: {u: 0, opcode: 0b01000, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en
USHL: {u: 0b1, opcode: 0b01000, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en
CMGT: {u: 0b0, opcode: 0b00110, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en
CMHI: {u: 0b1, opcode: 0b00110, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en
CMGE: {u: 0b0, opcode: 0b00111, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en
CMHS: {u: 0b1, opcode: 0b00111, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en
FCMEQ: {
u: 0b0, opcode: 0b11100,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en
FCMGT: {
u: 0b1, opcode: 0b11100,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en
FCMGE: {
u: 0b1, opcode: 0b11100,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMIN--vector---Floating-point-minimum--vector--?lang=en
VFMIN: {
u: 0b0, opcode: 0b11110,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en
VFMAX: {
u: 0b0, opcode: 0b11110,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMUL--vector---Floating-point-Multiply--vector--?lang=en
VFMUL: {
u: 0b1, opcode: 0b11011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FDIV--vector---Floating-point-Divide--vector--?lang=en
VFDIV: {
u: 0b1, opcode: 0b11111,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4S: {size: 0b00, q: 0b1},
VectorArrangement2S: {size: 0b00, q: 0b0},
VectorArrangement2D: {size: 0b01, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MUL--vector---Multiply--vector--?lang=en
VMUL: {u: 0b0, opcode: 0b10011, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQADD--Signed-saturating-Add-?lang=en
VSQADD: {u: 0b0, opcode: 0b00001, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQADD--Unsigned-saturating-Add-?lang=en
VUQADD: {u: 0b1, opcode: 0b00001, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMIN--Signed-Minimum--vector--?lang=en
SMIN: {u: 0b0, opcode: 0b01101, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMAX--Signed-Maximum--vector--?lang=en
SMAX: {u: 0b0, opcode: 0b01100, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMIN--Unsigned-Minimum--vector--?lang=en
UMIN: {u: 0b1, opcode: 0b01101, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAX--Unsigned-Maximum--vector--?lang=en
UMAX: {u: 0b1, opcode: 0b01100, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/URHADD--Unsigned-Rounding-Halving-Add-?lang=en
URHADD: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQSUB--Signed-saturating-Subtract-?lang=en
VSQSUB: {u: 0b0, opcode: 0b00101, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQSUB--Unsigned-saturating-Subtract-?lang=en
VUQSUB: {u: 0b1, opcode: 0b00101, qAndSize: defaultQAndSize},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIT--Bitwise-Insert-if-True-?lang=en
VBIT: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement8B: {q: 0b0, size: 0b10},
VectorArrangement16B: {q: 0b1, size: 0b10},
}},
SQRDMULH: {u: 0b1, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement4H: {q: 0b0, size: 0b01},
VectorArrangement8H: {q: 0b1, size: 0b01},
VectorArrangement2S: {q: 0b0, size: 0b10},
VectorArrangement4S: {q: 0b1, size: 0b10},
}},
}
// aAndSize is a pair of "Q" and "size" that appear in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
type qAndSize struct{ q, size byte }
// defaultQAndSize maps a vector arrangement to the default qAndSize which is encoded by many instructions.
var defaultQAndSize = map[VectorArrangement]qAndSize{
VectorArrangement8B: {size: 0b00, q: 0b0},
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement4H: {size: 0b01, q: 0b0},
VectorArrangement8H: {size: 0b01, q: 0b1},
VectorArrangement2S: {size: 0b10, q: 0b0},
VectorArrangement4S: {size: 0b10, q: 0b1},
VectorArrangement1D: {size: 0b11, q: 0b0},
VectorArrangement2D: {size: 0b11, q: 0b1},
}
// advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD across lanes" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDAcrossLanes = map[asm.Instruction]struct {
qAndSize map[VectorArrangement]qAndSize
u, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDV--Add-across-Vector-?lang=en
ADDV: {
u: 0b0, opcode: 0b11011,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
VectorArrangement8H: {size: 0b01, q: 0b1},
VectorArrangement4H: {size: 0b01, q: 0b0},
VectorArrangement4S: {size: 0b10, q: 0b1},
},
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMINV--Unsigned-Minimum-across-Vector-?lang=en
UMINV: {
u: 0b1, opcode: 0b11010,
qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
VectorArrangement8H: {size: 0b01, q: 0b1},
VectorArrangement4H: {size: 0b01, q: 0b0},
VectorArrangement4S: {size: 0b10, q: 0b1},
},
},
UADDLV: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{
VectorArrangement16B: {size: 0b00, q: 0b1},
VectorArrangement8B: {size: 0b00, q: 0b0},
VectorArrangement8H: {size: 0b01, q: 0b1},
VectorArrangement4H: {size: 0b01, q: 0b0},
VectorArrangement4S: {size: 0b10, q: 0b1},
}},
}
// advancedSIMDScalarPairwise holds information to encode instructions as "Advanced SIMD scalar pairwise" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDScalarPairwise = map[asm.Instruction]struct {
size map[VectorArrangement]byte
u, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDP--scalar---Add-Pair-of-elements--scalar--?lang=en
ADDP: {u: 0b0, opcode: 0b11011, size: map[VectorArrangement]byte{VectorArrangement2D: 0b11}},
}
// advancedSIMDCopy holds information to encode instructions as "Advanced SIMD copy" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDCopy = map[asm.Instruction]struct {
// TODO: extract common implementation of resolver.
resolver func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error)
op byte
}{
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar-?lang=en
DUPELEM: {op: 0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
imm4 = 0b0000
q = 0b1
switch arr {
case VectorArrangementB:
imm5 |= 0b1
imm5 |= byte(srcIndex) << 1
case VectorArrangementH:
imm5 |= 0b10
imm5 |= byte(srcIndex) << 2
case VectorArrangementS:
imm5 |= 0b100
imm5 |= byte(srcIndex) << 3
case VectorArrangementD:
imm5 |= 0b1000
imm5 |= byte(srcIndex) << 4
default:
err = fmt.Errorf("unsupported arrangement for DUPELEM: %d", arr)
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en
DUPGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
imm4 = 0b0001
switch arr {
case VectorArrangement8B:
imm5 = 0b1
case VectorArrangement16B:
imm5 = 0b1
q = 0b1
case VectorArrangement4H:
imm5 = 0b10
case VectorArrangement8H:
imm5 = 0b10
q = 0b1
case VectorArrangement2S:
imm5 = 0b100
case VectorArrangement4S:
imm5 = 0b100
q = 0b1
case VectorArrangement2D:
imm5 = 0b1000
q = 0b1
default:
err = fmt.Errorf("unsupported arrangement for DUPGEN: %s", arr)
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--general---Insert-vector-element-from-general-purpose-register-?lang=en
INSGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
imm4, q = 0b0011, 0b1
switch arr {
case VectorArrangementB:
imm5 |= 0b1
imm5 |= byte(dstIndex) << 1
case VectorArrangementH:
imm5 |= 0b10
imm5 |= byte(dstIndex) << 2
case VectorArrangementS:
imm5 |= 0b100
imm5 |= byte(dstIndex) << 3
case VectorArrangementD:
imm5 |= 0b1000
imm5 |= byte(dstIndex) << 4
default:
err = fmt.Errorf("unsupported arrangement for INSGEN: %s", arr)
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en
UMOV: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
imm4 = 0b0111
switch arr {
case VectorArrangementB:
imm5 |= 0b1
imm5 |= byte(srcIndex) << 1
case VectorArrangementH:
imm5 |= 0b10
imm5 |= byte(srcIndex) << 2
case VectorArrangementS:
imm5 |= 0b100
imm5 |= byte(srcIndex) << 3
case VectorArrangementD:
imm5 |= 0b1000
imm5 |= byte(srcIndex) << 4
q = 0b1
default:
err = fmt.Errorf("unsupported arrangement for UMOV: %s", arr)
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMOV--Signed-Move-vector-element-to-general-purpose-register-?lang=en
SMOV32: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
imm4 = 0b0101
switch arr {
case VectorArrangementB:
imm5 |= 0b1
imm5 |= byte(srcIndex) << 1
case VectorArrangementH:
imm5 |= 0b10
imm5 |= byte(srcIndex) << 2
default:
err = fmt.Errorf("unsupported arrangement for SMOV32: %s", arr)
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en
INSELEM: {op: 0b1, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) {
q = 0b1
switch arr {
case VectorArrangementB:
imm5 |= 0b1
imm5 |= byte(dstIndex) << 1
imm4 = byte(srcIndex)
case VectorArrangementH:
imm5 |= 0b10
imm5 |= byte(dstIndex) << 2
imm4 = byte(srcIndex) << 1
case VectorArrangementS:
imm5 |= 0b100
imm5 |= byte(dstIndex) << 3
imm4 = byte(srcIndex) << 2
case VectorArrangementD:
imm5 |= 0b1000
imm5 |= byte(dstIndex) << 4
imm4 = byte(srcIndex) << 3
default:
err = fmt.Errorf("unsupported arrangement for INSELEM: %d", arr)
}
return
}},
}
// advancedSIMDTableLookup holds information to encode instructions as "Advanced SIMD table lookup" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDTableLookup = map[asm.Instruction]struct {
q map[VectorArrangement]byte
op, op2, Len byte
}{
TBL1: {op: 0, op2: 0, Len: 0b00, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}},
TBL2: {op: 0, op2: 0, Len: 0b01, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}},
}
// advancedSIMDShiftByImmediate holds information to encode instructions as "Advanced SIMD shift by immediate" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDShiftByImmediate = map[asm.Instruction]struct {
q map[VectorArrangement]byte
immResolver func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error)
U, opcode byte
}{
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate--
SSHLL: {
U: 0b0, opcode: 0b10100,
q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0},
immResolver: immResolverForSIMDSiftLeftByImmediate,
},
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate--
SSHLL2: {
U: 0b0, opcode: 0b10100,
q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1},
immResolver: immResolverForSIMDSiftLeftByImmediate,
},
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate--
USHLL: {
U: 0b1, opcode: 0b10100,
q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0},
immResolver: immResolverForSIMDSiftLeftByImmediate,
},
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate--
USHLL2: {
U: 0b1, opcode: 0b10100,
q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1},
immResolver: immResolverForSIMDSiftLeftByImmediate,
},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHR--Signed-Shift-Right--immediate--?lang=en
SSHR: {
U: 0b0, opcode: 0b00000,
q: map[VectorArrangement]byte{
VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1, VectorArrangement2D: 0b1,
VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0,
},
immResolver: func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) {
switch arr {
case VectorArrangement16B, VectorArrangement8B:
immh = 0b0001
immb = 8 - byte(shiftAmount&0b111)
case VectorArrangement8H, VectorArrangement4H:
v := 16 - byte(shiftAmount&0b1111)
immb = v & 0b111
immh = 0b0010 | (v >> 3)
case VectorArrangement4S, VectorArrangement2S:
v := 32 - byte(shiftAmount&0b11111)
immb = v & 0b111
immh = 0b0100 | (v >> 3)
case VectorArrangement2D:
v := 64 - byte(shiftAmount&0b111111)
immb = v & 0b111
immh = 0b1000 | (v >> 3)
default:
err = fmt.Errorf("unsupported arrangement %s", arr)
}
return
},
},
}
// advancedSIMDPermute holds information to encode instructions as "Advanced SIMD permute" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDPermute = map[asm.Instruction]struct {
opcode byte
}{
ZIP1: {opcode: 0b011},
}
func immResolverForSIMDSiftLeftByImmediate(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) {
switch arr {
case VectorArrangement16B, VectorArrangement8B:
immb = byte(shiftAmount)
immh = 0b0001
case VectorArrangement8H, VectorArrangement4H:
immb = byte(shiftAmount) & 0b111
immh = 0b0010 | byte(shiftAmount>>3)
case VectorArrangement4S, VectorArrangement2S:
immb = byte(shiftAmount) & 0b111
immh = 0b0100 | byte(shiftAmount>>3)
default:
err = fmt.Errorf("unsupported arrangement %s", arr)
}
return
}
// encodeAdvancedSIMDCopy encodes instruction as "Advanced SIMD copy" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func (a *AssemblerImpl) encodeAdvancedSIMDCopy(buf asm.Buffer, srcRegBits, dstRegBits, op, imm5, imm4, q byte) {
buf.Append4Bytes(
(srcRegBits<<5)|dstRegBits,
imm4<<3|0b1<<2|srcRegBits>>3,
imm5,
q<<6|op<<5|0b1110,
)
}
// encodeAdvancedSIMDThreeSame encodes instruction as "Advanced SIMD three same" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func (a *AssemblerImpl) encodeAdvancedSIMDThreeSame(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) {
buf.Append4Bytes(
(src2<<5)|dst,
opcode<<3|1<<2|src2>>3,
size<<6|0b1<<5|src1,
q<<6|u<<5|0b1110,
)
}
// encodeAdvancedSIMDThreeDifferent encodes instruction as "Advanced SIMD three different" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func (a *AssemblerImpl) encodeAdvancedSIMDThreeDifferent(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) {
buf.Append4Bytes(
(src2<<5)|dst,
opcode<<4|src2>>3,
size<<6|0b1<<5|src1,
q<<6|u<<5|0b1110,
)
}
// encodeAdvancedSIMDPermute encodes instruction as "Advanced SIMD permute" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func (a *AssemblerImpl) encodeAdvancedSIMDPermute(buf asm.Buffer, src1, src2, dst, opcode, size, q byte) {
buf.Append4Bytes(
(src2<<5)|dst,
opcode<<4|0b1<<3|src2>>3,
size<<6|src1,
q<<6|0b1110,
)
}
func (a *AssemblerImpl) encodeVectorRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
var srcVectorRegBits byte
if n.srcReg != RegRZR {
srcVectorRegBits, err = vectorRegisterBits(n.srcReg)
} else if n.instruction == CMEQZERO {
// CMEQZERO has RegRZR as the src, and we apply the instruction to the same register as the destination.
srcVectorRegBits, err = vectorRegisterBits(n.dstReg)
}
if err != nil {
return err
}
dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
if err != nil {
return err
}
if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
if err != nil {
return err
}
a.encodeAdvancedSIMDCopy(buf, srcVectorRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q)
return nil
}
if scalarPairwise, ok := advancedSIMDScalarPairwise[n.instruction]; ok {
// See "Advanced SIMD scalar pairwise" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
size, ok := scalarPairwise.size[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
buf.Append4Bytes(
(srcVectorRegBits<<5)|dstVectorRegBits,
scalarPairwise.opcode<<4|1<<3|srcVectorRegBits>>3,
size<<6|0b11<<4|scalarPairwise.opcode>>4,
0b1<<6|scalarPairwise.u<<5|0b11110,
)
return
}
if twoRegMisc, ok := advancedSIMDTwoRegisterMisc[n.instruction]; ok {
// See "Advanced SIMD two-register miscellaneous" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
qs, ok := twoRegMisc.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
buf.Append4Bytes(
(srcVectorRegBits<<5)|dstVectorRegBits,
twoRegMisc.opcode<<4|0b1<<3|srcVectorRegBits>>3,
qs.size<<6|0b1<<5|twoRegMisc.opcode>>4,
qs.q<<6|twoRegMisc.u<<5|0b01110,
)
return nil
}
if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok {
qs, ok := threeSame.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
a.encodeAdvancedSIMDThreeSame(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u)
return nil
}
if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok {
qs, ok := threeDifferent.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
a.encodeAdvancedSIMDThreeDifferent(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u)
return nil
}
if acrossLanes, ok := advancedSIMDAcrossLanes[n.instruction]; ok {
// See "Advanced SIMD across lanes" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
qs, ok := acrossLanes.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
buf.Append4Bytes(
(srcVectorRegBits<<5)|dstVectorRegBits,
acrossLanes.opcode<<4|0b1<<3|srcVectorRegBits>>3,
qs.size<<6|0b11000<<1|acrossLanes.opcode>>4,
qs.q<<6|acrossLanes.u<<5|0b01110,
)
return nil
}
if lookup, ok := advancedSIMDTableLookup[n.instruction]; ok {
q, ok := lookup.q[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
buf.Append4Bytes(
(srcVectorRegBits<<5)|dstVectorRegBits,
lookup.Len<<5|lookup.op<<4|srcVectorRegBits>>3,
lookup.op2<<6|dstVectorRegBits,
q<<6|0b1110,
)
return
}
if shiftByImmediate, ok := advancedSIMDShiftByImmediate[n.instruction]; ok {
immh, immb, err := shiftByImmediate.immResolver(n.srcConst, n.vectorArrangement)
if err != nil {
return err
}
q, ok := shiftByImmediate.q[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
buf.Append4Bytes(
(srcVectorRegBits<<5)|dstVectorRegBits,
shiftByImmediate.opcode<<3|0b1<<2|srcVectorRegBits>>3,
immh<<3|immb,
q<<6|shiftByImmediate.U<<5|0b1111,
)
return nil
}
if permute, ok := advancedSIMDPermute[n.instruction]; ok {
size, q := arrangementSizeQ(n.vectorArrangement)
a.encodeAdvancedSIMDPermute(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, permute.opcode, size, q)
return
}
return errorEncodingUnsupported(n)
}
func (a *AssemblerImpl) encodeTwoVectorRegistersToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
var srcRegBits, srcRegBits2, dstRegBits byte
srcRegBits, err = vectorRegisterBits(n.srcReg)
if err != nil {
return err
}
srcRegBits2, err = vectorRegisterBits(n.srcReg2)
if err != nil {
return err
}
dstRegBits, err = vectorRegisterBits(n.dstReg)
if err != nil {
return err
}
if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok {
qs, ok := threeSame.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
a.encodeAdvancedSIMDThreeSame(buf, srcRegBits, srcRegBits2, dstRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u)
return nil
}
if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok {
qs, ok := threeDifferent.qAndSize[n.vectorArrangement]
if !ok {
return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction))
}
a.encodeAdvancedSIMDThreeDifferent(buf, srcRegBits, srcRegBits2, dstRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u)
return nil
}
if permute, ok := advancedSIMDPermute[n.instruction]; ok {
size, q := arrangementSizeQ(n.vectorArrangement)
a.encodeAdvancedSIMDPermute(buf, srcRegBits, srcRegBits2, dstRegBits, permute.opcode, size, q)
return
}
if n.instruction == EXT {
// EXT is the only instruction in "Advanced SIMD extract", so inline the encoding here.
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en
var q, imm4 byte
switch n.vectorArrangement {
case VectorArrangement16B:
imm4 = 0b1111 & byte(n.srcConst)
q = 0b1
case VectorArrangement8B:
imm4 = 0b111 & byte(n.srcConst)
default:
return fmt.Errorf("invalid arrangement %s for EXT", n.vectorArrangement)
}
buf.Append4Bytes(
(srcRegBits2<<5)|dstRegBits,
imm4<<3|srcRegBits2>>3,
srcRegBits,
q<<6|0b101110,
)
return
}
return
}
func (a *AssemblerImpl) encodeVectorRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
if err = checkArrangementIndexPair(n.vectorArrangement, n.srcVectorIndex); err != nil {
return
}
srcVecRegBits, err := vectorRegisterBits(n.srcReg)
if err != nil {
return err
}
dstRegBits, err := intRegisterBits(n.dstReg)
if err != nil {
return err
}
if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
if err != nil {
return err
}
a.encodeAdvancedSIMDCopy(buf, srcVecRegBits, dstRegBits, simdCopy.op, imm5, imm4, q)
return nil
}
return errorEncodingUnsupported(n)
}
func (a *AssemblerImpl) encodeRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) {
srcRegBits, err := intRegisterBits(n.srcReg)
if err != nil {
return err
}
dstVectorRegBits, err := vectorRegisterBits(n.dstReg)
if err != nil {
return err
}
if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok {
imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement)
if err != nil {
return err
}
a.encodeAdvancedSIMDCopy(buf, srcRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q)
return nil
}
return errorEncodingUnsupported(n)
}
var zeroRegisterBits byte = 0b11111
func isIntRegister(r asm.Register) bool {
return RegR0 <= r && r <= RegSP
}
func isVectorRegister(r asm.Register) bool {
return RegV0 <= r && r <= RegV31
}
func isConditionalRegister(r asm.Register) bool {
return RegCondEQ <= r && r <= RegCondNV
}
func intRegisterBits(r asm.Register) (ret byte, err error) {
if !isIntRegister(r) {
err = fmt.Errorf("%s is not integer", RegisterName(r))
} else if r == RegSP {
// SP has the same bit representations as RegRZR.
r = RegRZR
}
ret = byte(r - RegR0)
return
}
func vectorRegisterBits(r asm.Register) (ret byte, err error) {
if !isVectorRegister(r) {
err = fmt.Errorf("%s is not vector", RegisterName(r))
} else {
ret = byte(r - RegV0)
}
return
}
func registerBits(r asm.Register) (ret byte) {
if isIntRegister(r) {
if r == RegSP {
// SP has the same bit representations as RegRZR.
r = RegRZR
}
ret = byte(r - RegR0)
} else {
ret = byte(r - RegV0)
}
return
}