2297 lines
58 KiB
Go
2297 lines
58 KiB
Go
package arm64
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"strings"
|
|
|
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
|
)
|
|
|
|
type (
|
|
// instruction represents either a real instruction in arm64, or the meta instructions
|
|
// that are convenient for code generation. For example, inline constants are also treated
|
|
// as instructions.
|
|
//
|
|
// Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation
|
|
// can be considered equivalent to the sequence of such instructions.
|
|
//
|
|
// Each field is interpreted depending on the kind.
|
|
//
|
|
// TODO: optimize the layout later once the impl settles.
|
|
instruction struct {
|
|
kind instructionKind
|
|
prev, next *instruction
|
|
u1, u2, u3 uint64
|
|
rd, rm, rn, ra operand
|
|
amode addressMode
|
|
abi *abiImpl
|
|
targets []uint32
|
|
addedBeforeRegAlloc bool
|
|
}
|
|
|
|
// instructionKind represents the kind of instruction.
|
|
// This controls how the instruction struct is interpreted.
|
|
instructionKind int
|
|
)
|
|
|
|
type defKind byte
|
|
|
|
const (
|
|
defKindNone defKind = iota + 1
|
|
defKindRD
|
|
defKindCall
|
|
)
|
|
|
|
var defKinds = [numInstructionKinds]defKind{
|
|
adr: defKindRD,
|
|
aluRRR: defKindRD,
|
|
aluRRRR: defKindRD,
|
|
aluRRImm12: defKindRD,
|
|
aluRRBitmaskImm: defKindRD,
|
|
aluRRRShift: defKindRD,
|
|
aluRRImmShift: defKindRD,
|
|
aluRRRExtend: defKindRD,
|
|
bitRR: defKindRD,
|
|
movZ: defKindRD,
|
|
movK: defKindRD,
|
|
movN: defKindRD,
|
|
mov32: defKindRD,
|
|
mov64: defKindRD,
|
|
fpuMov64: defKindRD,
|
|
fpuMov128: defKindRD,
|
|
fpuRR: defKindRD,
|
|
fpuRRR: defKindRD,
|
|
nop0: defKindNone,
|
|
call: defKindCall,
|
|
callInd: defKindCall,
|
|
ret: defKindNone,
|
|
store8: defKindNone,
|
|
store16: defKindNone,
|
|
store32: defKindNone,
|
|
store64: defKindNone,
|
|
exitSequence: defKindNone,
|
|
condBr: defKindNone,
|
|
br: defKindNone,
|
|
brTableSequence: defKindNone,
|
|
cSet: defKindRD,
|
|
extend: defKindRD,
|
|
fpuCmp: defKindNone,
|
|
uLoad8: defKindRD,
|
|
uLoad16: defKindRD,
|
|
uLoad32: defKindRD,
|
|
sLoad8: defKindRD,
|
|
sLoad16: defKindRD,
|
|
sLoad32: defKindRD,
|
|
uLoad64: defKindRD,
|
|
fpuLoad32: defKindRD,
|
|
fpuLoad64: defKindRD,
|
|
fpuLoad128: defKindRD,
|
|
vecLoad1R: defKindRD,
|
|
loadFpuConst32: defKindRD,
|
|
loadFpuConst64: defKindRD,
|
|
loadFpuConst128: defKindRD,
|
|
fpuStore32: defKindNone,
|
|
fpuStore64: defKindNone,
|
|
fpuStore128: defKindNone,
|
|
udf: defKindNone,
|
|
cSel: defKindRD,
|
|
fpuCSel: defKindRD,
|
|
movToVec: defKindRD,
|
|
movFromVec: defKindRD,
|
|
movFromVecSigned: defKindRD,
|
|
vecDup: defKindRD,
|
|
vecDupElement: defKindRD,
|
|
vecExtract: defKindRD,
|
|
vecMisc: defKindRD,
|
|
vecMovElement: defKindRD,
|
|
vecLanes: defKindRD,
|
|
vecShiftImm: defKindRD,
|
|
vecTbl: defKindRD,
|
|
vecTbl2: defKindRD,
|
|
vecPermute: defKindRD,
|
|
vecRRR: defKindRD,
|
|
fpuToInt: defKindRD,
|
|
intToFpu: defKindRD,
|
|
cCmpImm: defKindNone,
|
|
movToFPSR: defKindNone,
|
|
movFromFPSR: defKindRD,
|
|
emitSourceOffsetInfo: defKindNone,
|
|
}
|
|
|
|
// defs returns the list of regalloc.VReg that are defined by the instruction.
|
|
// In order to reduce the number of allocations, the caller can pass the slice to be used.
|
|
func (i *instruction) defs(regs []regalloc.VReg) []regalloc.VReg {
|
|
switch defKinds[i.kind] {
|
|
case defKindNone:
|
|
case defKindRD:
|
|
regs = append(regs, i.rd.nr())
|
|
case defKindCall:
|
|
regs = append(regs, i.abi.retRealRegs...)
|
|
default:
|
|
panic(fmt.Sprintf("defKind for %v not defined", i))
|
|
}
|
|
return regs
|
|
}
|
|
|
|
func (i *instruction) assignDef(reg regalloc.VReg) {
|
|
switch defKinds[i.kind] {
|
|
case defKindNone:
|
|
case defKindRD:
|
|
i.rd = i.rd.assignReg(reg)
|
|
case defKindCall:
|
|
panic("BUG: call instructions shouldn't be assigned")
|
|
default:
|
|
panic(fmt.Sprintf("defKind for %v not defined", i))
|
|
}
|
|
}
|
|
|
|
type useKind byte
|
|
|
|
const (
|
|
useKindNone useKind = iota + 1
|
|
useKindRN
|
|
useKindRNRM
|
|
useKindRNRMRA
|
|
useKindRNRN1RM
|
|
useKindRet
|
|
useKindCall
|
|
useKindCallInd
|
|
useKindAMode
|
|
useKindRNAMode
|
|
useKindCond
|
|
)
|
|
|
|
var useKinds = [numInstructionKinds]useKind{
|
|
udf: useKindNone,
|
|
aluRRR: useKindRNRM,
|
|
aluRRRR: useKindRNRMRA,
|
|
aluRRImm12: useKindRN,
|
|
aluRRBitmaskImm: useKindRN,
|
|
aluRRRShift: useKindRNRM,
|
|
aluRRImmShift: useKindRN,
|
|
aluRRRExtend: useKindRNRM,
|
|
bitRR: useKindRN,
|
|
movZ: useKindNone,
|
|
movK: useKindNone,
|
|
movN: useKindNone,
|
|
mov32: useKindRN,
|
|
mov64: useKindRN,
|
|
fpuMov64: useKindRN,
|
|
fpuMov128: useKindRN,
|
|
fpuRR: useKindRN,
|
|
fpuRRR: useKindRNRM,
|
|
nop0: useKindNone,
|
|
call: useKindCall,
|
|
callInd: useKindCallInd,
|
|
ret: useKindRet,
|
|
store8: useKindRNAMode,
|
|
store16: useKindRNAMode,
|
|
store32: useKindRNAMode,
|
|
store64: useKindRNAMode,
|
|
exitSequence: useKindRN,
|
|
condBr: useKindCond,
|
|
br: useKindNone,
|
|
brTableSequence: useKindRN,
|
|
cSet: useKindNone,
|
|
extend: useKindRN,
|
|
fpuCmp: useKindRNRM,
|
|
uLoad8: useKindAMode,
|
|
uLoad16: useKindAMode,
|
|
uLoad32: useKindAMode,
|
|
sLoad8: useKindAMode,
|
|
sLoad16: useKindAMode,
|
|
sLoad32: useKindAMode,
|
|
uLoad64: useKindAMode,
|
|
fpuLoad32: useKindAMode,
|
|
fpuLoad64: useKindAMode,
|
|
fpuLoad128: useKindAMode,
|
|
fpuStore32: useKindRNAMode,
|
|
fpuStore64: useKindRNAMode,
|
|
fpuStore128: useKindRNAMode,
|
|
loadFpuConst32: useKindNone,
|
|
loadFpuConst64: useKindNone,
|
|
loadFpuConst128: useKindNone,
|
|
vecLoad1R: useKindRN,
|
|
cSel: useKindRNRM,
|
|
fpuCSel: useKindRNRM,
|
|
movToVec: useKindRN,
|
|
movFromVec: useKindRN,
|
|
movFromVecSigned: useKindRN,
|
|
vecDup: useKindRN,
|
|
vecDupElement: useKindRN,
|
|
vecExtract: useKindRNRM,
|
|
cCmpImm: useKindRN,
|
|
vecMisc: useKindRN,
|
|
vecMovElement: useKindRN,
|
|
vecLanes: useKindRN,
|
|
vecShiftImm: useKindRN,
|
|
vecTbl: useKindRNRM,
|
|
vecTbl2: useKindRNRN1RM,
|
|
vecRRR: useKindRNRM,
|
|
vecPermute: useKindRNRM,
|
|
fpuToInt: useKindRN,
|
|
intToFpu: useKindRN,
|
|
movToFPSR: useKindRN,
|
|
movFromFPSR: useKindNone,
|
|
adr: useKindNone,
|
|
emitSourceOffsetInfo: useKindNone,
|
|
}
|
|
|
|
// uses returns the list of regalloc.VReg that are used by the instruction.
|
|
// In order to reduce the number of allocations, the caller can pass the slice to be used.
|
|
func (i *instruction) uses(regs []regalloc.VReg) []regalloc.VReg {
|
|
switch useKinds[i.kind] {
|
|
case useKindNone:
|
|
case useKindRN:
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
regs = append(regs, rn)
|
|
}
|
|
case useKindRNRM:
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
regs = append(regs, rn)
|
|
}
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
regs = append(regs, rm)
|
|
}
|
|
case useKindRNRMRA:
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
regs = append(regs, rn)
|
|
}
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
regs = append(regs, rm)
|
|
}
|
|
if ra := i.ra.reg(); ra.Valid() {
|
|
regs = append(regs, ra)
|
|
}
|
|
case useKindRNRN1RM:
|
|
if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() {
|
|
rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
|
|
regs = append(regs, rn, rn1)
|
|
}
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
regs = append(regs, rm)
|
|
}
|
|
case useKindRet:
|
|
regs = append(regs, i.abi.retRealRegs...)
|
|
case useKindAMode:
|
|
if amodeRN := i.amode.rn; amodeRN.Valid() {
|
|
regs = append(regs, amodeRN)
|
|
}
|
|
if amodeRM := i.amode.rm; amodeRM.Valid() {
|
|
regs = append(regs, amodeRM)
|
|
}
|
|
case useKindRNAMode:
|
|
regs = append(regs, i.rn.reg())
|
|
if amodeRN := i.amode.rn; amodeRN.Valid() {
|
|
regs = append(regs, amodeRN)
|
|
}
|
|
if amodeRM := i.amode.rm; amodeRM.Valid() {
|
|
regs = append(regs, amodeRM)
|
|
}
|
|
case useKindCond:
|
|
cnd := cond(i.u1)
|
|
if cnd.kind() != condKindCondFlagSet {
|
|
regs = append(regs, cnd.register())
|
|
}
|
|
case useKindCall:
|
|
regs = append(regs, i.abi.argRealRegs...)
|
|
case useKindCallInd:
|
|
regs = append(regs, i.rn.nr())
|
|
regs = append(regs, i.abi.argRealRegs...)
|
|
default:
|
|
panic(fmt.Sprintf("useKind for %v not defined", i))
|
|
}
|
|
return regs
|
|
}
|
|
|
|
func (i *instruction) assignUse(index int, reg regalloc.VReg) {
|
|
switch useKinds[i.kind] {
|
|
case useKindNone:
|
|
case useKindRN:
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
i.rn = i.rn.assignReg(reg)
|
|
}
|
|
case useKindRNRM:
|
|
if index == 0 {
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
i.rn = i.rn.assignReg(reg)
|
|
}
|
|
} else {
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
i.rm = i.rm.assignReg(reg)
|
|
}
|
|
}
|
|
case useKindRNRN1RM:
|
|
if index == 0 {
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
i.rn = i.rn.assignReg(reg)
|
|
}
|
|
if rn1 := i.rn.reg() + 1; rn1.Valid() {
|
|
i.rm = i.rm.assignReg(reg + 1)
|
|
}
|
|
} else {
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
i.rm = i.rm.assignReg(reg)
|
|
}
|
|
}
|
|
case useKindRNRMRA:
|
|
if index == 0 {
|
|
if rn := i.rn.reg(); rn.Valid() {
|
|
i.rn = i.rn.assignReg(reg)
|
|
}
|
|
} else if index == 1 {
|
|
if rm := i.rm.reg(); rm.Valid() {
|
|
i.rm = i.rm.assignReg(reg)
|
|
}
|
|
} else {
|
|
if ra := i.ra.reg(); ra.Valid() {
|
|
i.ra = i.ra.assignReg(reg)
|
|
}
|
|
}
|
|
case useKindRet:
|
|
panic("BUG: ret instructions shouldn't be assigned")
|
|
case useKindAMode:
|
|
if index == 0 {
|
|
if amodeRN := i.amode.rn; amodeRN.Valid() {
|
|
i.amode.rn = reg
|
|
}
|
|
} else {
|
|
if amodeRM := i.amode.rm; amodeRM.Valid() {
|
|
i.amode.rm = reg
|
|
}
|
|
}
|
|
case useKindRNAMode:
|
|
if index == 0 {
|
|
i.rn = i.rn.assignReg(reg)
|
|
} else if index == 1 {
|
|
if amodeRN := i.amode.rn; amodeRN.Valid() {
|
|
i.amode.rn = reg
|
|
} else {
|
|
panic("BUG")
|
|
}
|
|
} else {
|
|
if amodeRM := i.amode.rm; amodeRM.Valid() {
|
|
i.amode.rm = reg
|
|
} else {
|
|
panic("BUG")
|
|
}
|
|
}
|
|
case useKindCond:
|
|
c := cond(i.u1)
|
|
switch c.kind() {
|
|
case condKindRegisterZero:
|
|
i.u1 = uint64(registerAsRegZeroCond(reg))
|
|
case condKindRegisterNotZero:
|
|
i.u1 = uint64(registerAsRegNotZeroCond(reg))
|
|
}
|
|
case useKindCall:
|
|
panic("BUG: call instructions shouldn't be assigned")
|
|
case useKindCallInd:
|
|
i.rn = i.rn.assignReg(reg)
|
|
default:
|
|
panic(fmt.Sprintf("useKind for %v not defined", i))
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) {
|
|
i.kind = call
|
|
i.u1 = uint64(ref)
|
|
i.abi = abi
|
|
}
|
|
|
|
func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) {
|
|
i.kind = callInd
|
|
i.rn = operandNR(ptr)
|
|
i.abi = abi
|
|
}
|
|
|
|
func (i *instruction) callFuncRef() ssa.FuncRef {
|
|
return ssa.FuncRef(i.u1)
|
|
}
|
|
|
|
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
|
|
func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
|
|
i.kind = movZ
|
|
i.rd = operandNR(dst)
|
|
i.u1 = imm
|
|
i.u2 = shift
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
|
|
func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
|
|
i.kind = movK
|
|
i.rd = operandNR(dst)
|
|
i.u1 = imm
|
|
i.u2 = shift
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
|
|
func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
|
|
i.kind = movN
|
|
i.rd = operandNR(dst)
|
|
i.u1 = imm
|
|
i.u2 = shift
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asNop0() *instruction {
|
|
i.kind = nop0
|
|
return i
|
|
}
|
|
|
|
func (i *instruction) asNop0WithLabel(l label) {
|
|
i.kind = nop0
|
|
i.u1 = uint64(l)
|
|
}
|
|
|
|
func (i *instruction) nop0Label() label {
|
|
return label(i.u1)
|
|
}
|
|
|
|
func (i *instruction) asRet(abi *abiImpl) {
|
|
i.kind = ret
|
|
i.abi = abi
|
|
}
|
|
|
|
func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
|
|
i.kind = storeP64
|
|
i.rn = operandNR(src1)
|
|
i.rm = operandNR(src2)
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
|
|
i.kind = loadP64
|
|
i.rn = operandNR(src1)
|
|
i.rm = operandNR(src2)
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
|
|
switch sizeInBits {
|
|
case 8:
|
|
i.kind = store8
|
|
case 16:
|
|
i.kind = store16
|
|
case 32:
|
|
if src.reg().RegType() == regalloc.RegTypeInt {
|
|
i.kind = store32
|
|
} else {
|
|
i.kind = fpuStore32
|
|
}
|
|
case 64:
|
|
if src.reg().RegType() == regalloc.RegTypeInt {
|
|
i.kind = store64
|
|
} else {
|
|
i.kind = fpuStore64
|
|
}
|
|
case 128:
|
|
i.kind = fpuStore128
|
|
}
|
|
i.rn = src
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
|
|
switch sizeInBits {
|
|
case 8:
|
|
i.kind = sLoad8
|
|
case 16:
|
|
i.kind = sLoad16
|
|
case 32:
|
|
i.kind = sLoad32
|
|
default:
|
|
panic("BUG")
|
|
}
|
|
i.rd = dst
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
|
|
switch sizeInBits {
|
|
case 8:
|
|
i.kind = uLoad8
|
|
case 16:
|
|
i.kind = uLoad16
|
|
case 32:
|
|
i.kind = uLoad32
|
|
case 64:
|
|
i.kind = uLoad64
|
|
}
|
|
i.rd = dst
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
|
|
switch sizeInBits {
|
|
case 32:
|
|
i.kind = fpuLoad32
|
|
case 64:
|
|
i.kind = fpuLoad64
|
|
case 128:
|
|
i.kind = fpuLoad128
|
|
}
|
|
i.rd = dst
|
|
i.amode = amode
|
|
}
|
|
|
|
func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
|
|
// NOTE: currently only has support for no-offset loads, though it is suspicious that
|
|
// we would need to support offset load (that is only available for post-index).
|
|
i.kind = vecLoad1R
|
|
i.rd = rd
|
|
i.rn = rn
|
|
i.u1 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asCSet(rd regalloc.VReg, c condFlag) {
|
|
i.kind = cSet
|
|
i.rd = operandNR(rd)
|
|
i.u1 = uint64(c)
|
|
}
|
|
|
|
func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
|
|
i.kind = cSel
|
|
i.rd = rd
|
|
i.rn = rn
|
|
i.rm = rm
|
|
i.u1 = uint64(c)
|
|
if _64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
|
|
i.kind = fpuCSel
|
|
i.rd = rd
|
|
i.rn = rn
|
|
i.rm = rm
|
|
i.u1 = uint64(c)
|
|
if _64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asBr(target label) {
|
|
if target == returnLabel {
|
|
panic("BUG: call site should special case for returnLabel")
|
|
}
|
|
i.kind = br
|
|
i.u1 = uint64(target)
|
|
}
|
|
|
|
func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) {
|
|
i.kind = brTableSequence
|
|
i.rn = operandNR(indexReg)
|
|
i.targets = targets
|
|
}
|
|
|
|
func (i *instruction) brTableSequenceOffsetsResolved() {
|
|
i.u3 = 1 // indicate that the offsets are resolved, for debugging.
|
|
}
|
|
|
|
func (i *instruction) brLabel() label {
|
|
return label(i.u1)
|
|
}
|
|
|
|
// brOffsetResolved is called when the target label is resolved.
|
|
func (i *instruction) brOffsetResolved(offset int64) {
|
|
i.u2 = uint64(offset)
|
|
i.u3 = 1 // indicate that the offset is resolved, for debugging.
|
|
}
|
|
|
|
func (i *instruction) brOffset() int64 {
|
|
return int64(i.u2)
|
|
}
|
|
|
|
// asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag.
|
|
func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
|
|
i.kind = condBr
|
|
i.u1 = c.asUint64()
|
|
i.u2 = uint64(target)
|
|
if is64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) condBrLabel() label {
|
|
return label(i.u2)
|
|
}
|
|
|
|
// condBrOffsetResolve is called when the target label is resolved.
|
|
func (i *instruction) condBrOffsetResolve(offset int64) {
|
|
i.rd.data = uint64(offset)
|
|
i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
|
|
}
|
|
|
|
// condBrOffsetResolved returns true if condBrOffsetResolve is already called.
|
|
func (i *instruction) condBrOffsetResolved() bool {
|
|
return i.rd.data2 == 1
|
|
}
|
|
|
|
func (i *instruction) condBrOffset() int64 {
|
|
return int64(i.rd.data)
|
|
}
|
|
|
|
func (i *instruction) condBrCond() cond {
|
|
return cond(i.u1)
|
|
}
|
|
|
|
func (i *instruction) condBr64bit() bool {
|
|
return i.u3 == 1
|
|
}
|
|
|
|
func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
|
|
i.kind = loadFpuConst32
|
|
i.u1 = raw
|
|
i.rd = operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
|
|
i.kind = loadFpuConst64
|
|
i.u1 = raw
|
|
i.rd = operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
|
|
i.kind = loadFpuConst128
|
|
i.u1 = lo
|
|
i.u2 = hi
|
|
i.rd = operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
|
|
i.kind = fpuCmp
|
|
i.rn, i.rm = rn, rm
|
|
if is64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) {
|
|
i.kind = cCmpImm
|
|
i.rn = rn
|
|
i.rm.data = imm
|
|
i.u1 = uint64(c)
|
|
i.u2 = uint64(flag)
|
|
if is64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
// asALU setups a basic ALU instruction.
|
|
func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
|
|
switch rm.kind {
|
|
case operandKindNR:
|
|
i.kind = aluRRR
|
|
case operandKindSR:
|
|
i.kind = aluRRRShift
|
|
case operandKindER:
|
|
i.kind = aluRRRExtend
|
|
case operandKindImm12:
|
|
i.kind = aluRRImm12
|
|
default:
|
|
panic("BUG")
|
|
}
|
|
i.u1 = uint64(aluOp)
|
|
i.rd, i.rn, i.rm = rd, rn, rm
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
// asALU setups a basic ALU instruction.
|
|
func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
|
|
i.kind = aluRRRR
|
|
i.u1 = uint64(aluOp)
|
|
i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
// asALUShift setups a shift based ALU instruction.
|
|
func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
|
|
switch rm.kind {
|
|
case operandKindNR:
|
|
i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
|
|
case operandKindShiftImm:
|
|
i.kind = aluRRImmShift
|
|
default:
|
|
panic("BUG")
|
|
}
|
|
i.u1 = uint64(aluOp)
|
|
i.rd, i.rn, i.rm = rd, rn, rm
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
|
|
i.kind = aluRRBitmaskImm
|
|
i.u1 = uint64(aluOp)
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
i.u2 = imm
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
|
|
i.kind = movToFPSR
|
|
i.rn = operandNR(rn)
|
|
}
|
|
|
|
func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
|
|
i.kind = movFromFPSR
|
|
i.rd = operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
|
|
i.kind = bitRR
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
i.u1 = uint64(bitOp)
|
|
if is64bit {
|
|
i.u2 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
|
|
i.kind = fpuRRR
|
|
i.u1 = uint64(op)
|
|
i.rd, i.rn, i.rm = rd, rn, rm
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
|
|
i.kind = fpuRR
|
|
i.u1 = uint64(op)
|
|
i.rd, i.rn = rd, rn
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
|
|
i.kind = extend
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
i.u1 = uint64(fromBits)
|
|
i.u2 = uint64(toBits)
|
|
if signed {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asMove32(rd, rn regalloc.VReg) {
|
|
i.kind = mov32
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asMove64(rd, rn regalloc.VReg) {
|
|
i.kind = mov64
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
|
|
i.kind = fpuMov64
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) {
|
|
i.kind = fpuMov128
|
|
i.rn, i.rd = operandNR(rn), operandNR(rd)
|
|
}
|
|
|
|
func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
|
|
i.kind = movToVec
|
|
i.rd = rd
|
|
i.rn = rn
|
|
i.u1, i.u2 = uint64(arr), uint64(index)
|
|
}
|
|
|
|
func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
|
|
if signed {
|
|
i.kind = movFromVecSigned
|
|
} else {
|
|
i.kind = movFromVec
|
|
}
|
|
i.rd = rd
|
|
i.rn = rn
|
|
i.u1, i.u2 = uint64(arr), uint64(index)
|
|
}
|
|
|
|
func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
|
|
i.kind = vecDup
|
|
i.u1 = uint64(arr)
|
|
i.rn, i.rd = rn, rd
|
|
}
|
|
|
|
func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
|
|
i.kind = vecDupElement
|
|
i.u1 = uint64(arr)
|
|
i.rn, i.rd = rn, rd
|
|
i.u2 = uint64(index)
|
|
}
|
|
|
|
func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
|
|
i.kind = vecExtract
|
|
i.u1 = uint64(arr)
|
|
i.rn, i.rm, i.rd = rn, rm, rd
|
|
i.u2 = uint64(index)
|
|
}
|
|
|
|
func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
|
|
i.kind = vecMovElement
|
|
i.u1 = uint64(arr)
|
|
i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
|
|
i.rn, i.rd = rn, rd
|
|
}
|
|
|
|
func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
|
|
i.kind = vecMisc
|
|
i.u1 = uint64(op)
|
|
i.rn, i.rd = rn, rd
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
|
|
i.kind = vecLanes
|
|
i.u1 = uint64(op)
|
|
i.rn, i.rd = rn, rd
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) {
|
|
i.kind = vecShiftImm
|
|
i.u1 = uint64(op)
|
|
i.rn, i.rm, i.rd = rn, rm, rd
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
|
|
switch nregs {
|
|
case 0, 1:
|
|
i.kind = vecTbl
|
|
case 2:
|
|
i.kind = vecTbl2
|
|
if !rn.reg().IsRealReg() {
|
|
panic("rn is not a RealReg")
|
|
}
|
|
if rn.realReg() == v31 {
|
|
panic("rn cannot be v31")
|
|
}
|
|
default:
|
|
panic(fmt.Sprintf("unsupported number of registers %d", nregs))
|
|
}
|
|
i.rn, i.rm, i.rd = rn, rm, rd
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
|
|
i.kind = vecPermute
|
|
i.u1 = uint64(op)
|
|
i.rn, i.rm, i.rd = rn, rm, rd
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) {
|
|
i.kind = vecRRR
|
|
i.u1 = uint64(op)
|
|
i.rn, i.rd, i.rm = rn, rd, rm
|
|
i.u2 = uint64(arr)
|
|
}
|
|
|
|
func (i *instruction) isCopy() bool {
|
|
op := i.kind
|
|
// We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits,
|
|
// and it is only used in the translation of IReduce, not the actual copy indeed.
|
|
return op == mov64 || op == fpuMov64 || op == fpuMov128
|
|
}
|
|
|
|
// String implements fmt.Stringer.
|
|
func (i *instruction) String() (str string) {
|
|
is64SizeBitToSize := func(u3 uint64) byte {
|
|
if u3 == 0 {
|
|
return 32
|
|
}
|
|
return 64
|
|
}
|
|
|
|
switch i.kind {
|
|
case nop0:
|
|
if i.u1 != 0 {
|
|
l := label(i.u1)
|
|
str = fmt.Sprintf("%s:", l)
|
|
} else {
|
|
str = "nop0"
|
|
}
|
|
case aluRRR:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
|
|
i.rm.format(size))
|
|
case aluRRRR:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
|
|
case aluRRImm12:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
|
|
case aluRRBitmaskImm:
|
|
size := is64SizeBitToSize(i.u3)
|
|
rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
|
|
if size == 32 {
|
|
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
|
|
} else {
|
|
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
|
|
}
|
|
case aluRRImmShift:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %#x",
|
|
aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
i.rm.shiftImm(),
|
|
)
|
|
case aluRRRShift:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s",
|
|
aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
i.rm.format(size),
|
|
)
|
|
case aluRRRExtend:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
// Regardless of the source size, the register is formatted in 32-bit.
|
|
i.rm.format(32),
|
|
)
|
|
case bitRR:
|
|
size := is64SizeBitToSize(i.u2)
|
|
str = fmt.Sprintf("%s %s, %s",
|
|
bitOp(i.u1),
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
)
|
|
case uLoad8:
|
|
str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case sLoad8:
|
|
str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case uLoad16:
|
|
str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case sLoad16:
|
|
str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case uLoad32:
|
|
str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case sLoad32:
|
|
str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case uLoad64:
|
|
str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
|
|
case store8:
|
|
str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
|
|
case store16:
|
|
str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
|
|
case store32:
|
|
str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
|
|
case store64:
|
|
str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
|
|
case storeP64:
|
|
str = fmt.Sprintf("stp %s, %s, %s",
|
|
formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
|
|
case loadP64:
|
|
str = fmt.Sprintf("ldp %s, %s, %s",
|
|
formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
|
|
case mov64:
|
|
str = fmt.Sprintf("mov %s, %s",
|
|
formatVRegSized(i.rd.nr(), 64),
|
|
formatVRegSized(i.rn.nr(), 64))
|
|
case mov32:
|
|
str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
|
|
case movZ:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
|
|
case movN:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
|
|
case movK:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
|
|
case extend:
|
|
fromBits, toBits := byte(i.u1), byte(i.u2)
|
|
|
|
var signedStr string
|
|
if i.u3 == 1 {
|
|
signedStr = "s"
|
|
} else {
|
|
signedStr = "u"
|
|
}
|
|
var fromStr string
|
|
switch fromBits {
|
|
case 8:
|
|
fromStr = "b"
|
|
case 16:
|
|
fromStr = "h"
|
|
case 32:
|
|
fromStr = "w"
|
|
}
|
|
str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
|
|
case cSel:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("csel %s, %s, %s, %s",
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
formatVRegSized(i.rm.nr(), size),
|
|
condFlag(i.u1),
|
|
)
|
|
case cSet:
|
|
str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
|
|
case cCmpImm:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
|
|
formatVRegSized(i.rn.nr(), size), i.rm.data,
|
|
i.u2&0b1111,
|
|
condFlag(i.u1))
|
|
case fpuMov64:
|
|
str = fmt.Sprintf("mov %s, %s",
|
|
formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
|
|
case fpuMov128:
|
|
str = fmt.Sprintf("mov %s, %s",
|
|
formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
|
|
case fpuMovFromVec:
|
|
panic("TODO")
|
|
case fpuRR:
|
|
dstSz := is64SizeBitToSize(i.u3)
|
|
srcSz := dstSz
|
|
op := fpuUniOp(i.u1)
|
|
switch op {
|
|
case fpuUniOpCvt32To64:
|
|
srcSz = 32
|
|
case fpuUniOpCvt64To32:
|
|
srcSz = 64
|
|
}
|
|
str = fmt.Sprintf("%s %s, %s", op.String(),
|
|
formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
|
|
case fpuRRR:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
|
|
formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
|
|
case fpuRRI:
|
|
panic("TODO")
|
|
case fpuRRRR:
|
|
panic("TODO")
|
|
case fpuCmp:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("fcmp %s, %s",
|
|
formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
|
|
case fpuLoad32:
|
|
str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
|
|
case fpuStore32:
|
|
str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
|
|
case fpuLoad64:
|
|
str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
|
|
case fpuStore64:
|
|
str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
|
|
case fpuLoad128:
|
|
str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
|
|
case fpuStore128:
|
|
str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
|
|
case loadFpuConst32:
|
|
str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
|
|
case loadFpuConst64:
|
|
str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
|
|
case loadFpuConst128:
|
|
str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x",
|
|
formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
|
|
case fpuToInt:
|
|
var op, src, dst string
|
|
if signed := i.u1 == 1; signed {
|
|
op = "fcvtzs"
|
|
} else {
|
|
op = "fcvtzu"
|
|
}
|
|
if src64 := i.u2 == 1; src64 {
|
|
src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
|
|
} else {
|
|
src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
|
|
}
|
|
if dst64 := i.u3 == 1; dst64 {
|
|
dst = formatVRegSized(i.rd.nr(), 64)
|
|
} else {
|
|
dst = formatVRegSized(i.rd.nr(), 32)
|
|
}
|
|
str = fmt.Sprintf("%s %s, %s", op, dst, src)
|
|
|
|
case intToFpu:
|
|
var op, src, dst string
|
|
if signed := i.u1 == 1; signed {
|
|
op = "scvtf"
|
|
} else {
|
|
op = "ucvtf"
|
|
}
|
|
if src64 := i.u2 == 1; src64 {
|
|
src = formatVRegSized(i.rn.nr(), 64)
|
|
} else {
|
|
src = formatVRegSized(i.rn.nr(), 32)
|
|
}
|
|
if dst64 := i.u3 == 1; dst64 {
|
|
dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
|
|
} else {
|
|
dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
|
|
}
|
|
str = fmt.Sprintf("%s %s, %s", op, dst, src)
|
|
case fpuCSel:
|
|
size := is64SizeBitToSize(i.u3)
|
|
str = fmt.Sprintf("fcsel %s, %s, %s, %s",
|
|
formatVRegSized(i.rd.nr(), size),
|
|
formatVRegSized(i.rn.nr(), size),
|
|
formatVRegSized(i.rm.nr(), size),
|
|
condFlag(i.u1),
|
|
)
|
|
case movToVec:
|
|
var size byte
|
|
arr := vecArrangement(i.u1)
|
|
switch arr {
|
|
case vecArrangementB, vecArrangementH, vecArrangementS:
|
|
size = 32
|
|
case vecArrangementD:
|
|
size = 64
|
|
default:
|
|
panic("unsupported arrangement " + arr.String())
|
|
}
|
|
str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
|
|
case movFromVec, movFromVecSigned:
|
|
var size byte
|
|
var opcode string
|
|
arr := vecArrangement(i.u1)
|
|
signed := i.kind == movFromVecSigned
|
|
switch arr {
|
|
case vecArrangementB, vecArrangementH, vecArrangementS:
|
|
size = 32
|
|
if signed {
|
|
opcode = "smov"
|
|
} else {
|
|
opcode = "umov"
|
|
}
|
|
case vecArrangementD:
|
|
size = 64
|
|
if signed {
|
|
opcode = "smov"
|
|
} else {
|
|
opcode = "mov"
|
|
}
|
|
default:
|
|
panic("unsupported arrangement " + arr.String())
|
|
}
|
|
str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
|
|
case vecDup:
|
|
str = fmt.Sprintf("dup %s, %s",
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
|
|
formatVRegSized(i.rn.nr(), 64),
|
|
)
|
|
case vecDupElement:
|
|
arr := vecArrangement(i.u1)
|
|
str = fmt.Sprintf("dup %s, %s",
|
|
formatVRegVec(i.rd.nr(), arr, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
|
|
)
|
|
case vecDupFromFpu:
|
|
panic("TODO")
|
|
case vecExtract:
|
|
str = fmt.Sprintf("ext %s, %s, %s, #%d",
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
|
|
formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
|
|
uint32(i.u2),
|
|
)
|
|
case vecExtend:
|
|
panic("TODO")
|
|
case vecMovElement:
|
|
str = fmt.Sprintf("mov %s, %s",
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
|
|
formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
|
|
)
|
|
case vecMiscNarrow:
|
|
panic("TODO")
|
|
case vecRRR:
|
|
str = fmt.Sprintf("%s %s, %s, %s",
|
|
vecOp(i.u1),
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
|
|
formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
|
|
)
|
|
case vecMisc:
|
|
vop := vecOp(i.u1)
|
|
if vop == vecOpCmeq0 {
|
|
str = fmt.Sprintf("cmeq %s, %s, #0",
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
|
|
} else {
|
|
str = fmt.Sprintf("%s %s, %s",
|
|
vop,
|
|
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
|
|
}
|
|
case vecLanes:
|
|
arr := vecArrangement(i.u2)
|
|
var destArr vecArrangement
|
|
switch arr {
|
|
case vecArrangement8B, vecArrangement16B:
|
|
destArr = vecArrangementH
|
|
case vecArrangement4H, vecArrangement8H:
|
|
destArr = vecArrangementS
|
|
case vecArrangement4S:
|
|
destArr = vecArrangementD
|
|
default:
|
|
panic("invalid arrangement " + arr.String())
|
|
}
|
|
str = fmt.Sprintf("%s %s, %s",
|
|
vecOp(i.u1),
|
|
formatVRegWidthVec(i.rd.nr(), destArr),
|
|
formatVRegVec(i.rn.nr(), arr, vecIndexNone))
|
|
case vecShiftImm:
|
|
arr := vecArrangement(i.u2)
|
|
str = fmt.Sprintf("%s %s, %s, #%d",
|
|
vecOp(i.u1),
|
|
formatVRegVec(i.rd.nr(), arr, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
|
|
i.rm.shiftImm())
|
|
case vecTbl:
|
|
arr := vecArrangement(i.u2)
|
|
str = fmt.Sprintf("tbl %s, { %s }, %s",
|
|
formatVRegVec(i.rd.nr(), arr, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
|
|
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
|
|
case vecTbl2:
|
|
arr := vecArrangement(i.u2)
|
|
rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
|
|
rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
|
|
str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
|
|
formatVRegVec(rd, arr, vecIndexNone),
|
|
formatVRegVec(rn, vecArrangement16B, vecIndexNone),
|
|
formatVRegVec(rn1, vecArrangement16B, vecIndexNone),
|
|
formatVRegVec(rm, arr, vecIndexNone))
|
|
case vecPermute:
|
|
arr := vecArrangement(i.u2)
|
|
str = fmt.Sprintf("%s %s, %s, %s",
|
|
vecOp(i.u1),
|
|
formatVRegVec(i.rd.nr(), arr, vecIndexNone),
|
|
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
|
|
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
|
|
case movToFPSR:
|
|
str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
|
|
case movFromFPSR:
|
|
str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
|
|
case call:
|
|
if i.u2 > 0 {
|
|
str = fmt.Sprintf("bl #%#x", i.u2)
|
|
} else {
|
|
str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
|
|
}
|
|
case callInd:
|
|
str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64))
|
|
case ret:
|
|
str = "ret"
|
|
case br:
|
|
target := label(i.u1)
|
|
if i.u3 != 0 {
|
|
str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
|
|
} else {
|
|
str = fmt.Sprintf("b %s", target.String())
|
|
}
|
|
case condBr:
|
|
size := is64SizeBitToSize(i.u3)
|
|
c := cond(i.u1)
|
|
target := label(i.u2)
|
|
switch c.kind() {
|
|
case condKindRegisterZero:
|
|
if !i.condBrOffsetResolved() {
|
|
str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String())
|
|
} else {
|
|
str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String())
|
|
}
|
|
case condKindRegisterNotZero:
|
|
if offset := i.condBrOffset(); offset != 0 {
|
|
str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String())
|
|
} else {
|
|
str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String())
|
|
}
|
|
case condKindCondFlagSet:
|
|
if offset := i.condBrOffset(); offset != 0 {
|
|
if target == invalidLabel {
|
|
str = fmt.Sprintf("b.%s #%#x", c.flag(), offset)
|
|
} else {
|
|
str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String())
|
|
}
|
|
} else {
|
|
str = fmt.Sprintf("b.%s %s", c.flag(), target.String())
|
|
}
|
|
}
|
|
case adr:
|
|
str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
|
|
case brTableSequence:
|
|
if i.u3 == 0 { // The offsets haven't been resolved yet.
|
|
labels := make([]string, len(i.targets))
|
|
for index, l := range i.targets {
|
|
labels[index] = label(l).String()
|
|
}
|
|
str = fmt.Sprintf("br_table_sequence %s, [%s]",
|
|
formatVRegSized(i.rn.nr(), 64),
|
|
strings.Join(labels, ", "),
|
|
)
|
|
} else {
|
|
// See encodeBrTableSequence for the encoding.
|
|
offsets := make([]string, len(i.targets))
|
|
for index, offset := range i.targets {
|
|
offsets[index] = fmt.Sprintf("%#x", int32(offset))
|
|
}
|
|
str = fmt.Sprintf(
|
|
`adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`,
|
|
formatVRegSized(i.rn.nr(), 64),
|
|
formatVRegSized(tmpRegVReg, 64),
|
|
offsets,
|
|
)
|
|
}
|
|
case exitSequence:
|
|
str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64))
|
|
case udf:
|
|
str = "udf"
|
|
case emitSourceOffsetInfo:
|
|
str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
|
|
case vecLoad1R:
|
|
str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
|
|
default:
|
|
panic(i.kind)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
|
|
i.kind = adr
|
|
i.rd = operandNR(rd)
|
|
i.u1 = uint64(offset)
|
|
}
|
|
|
|
// TODO: delete unnecessary things.
|
|
const (
|
|
// nop0 represents a no-op of zero size.
|
|
nop0 instructionKind = iota + 1
|
|
// aluRRR represents an ALU operation with two register sources and a register destination.
|
|
aluRRR
|
|
// aluRRRR represents an ALU operation with three register sources and a register destination.
|
|
aluRRRR
|
|
// aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination.
|
|
aluRRImm12
|
|
// aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination.
|
|
aluRRBitmaskImm
|
|
// aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination.
|
|
aluRRImmShift
|
|
// aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination.
|
|
aluRRRShift
|
|
// aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination.
|
|
aluRRRExtend
|
|
// bitRR represents a bit op instruction with a single register source.
|
|
bitRR
|
|
// uLoad8 represents an unsigned 8-bit load.
|
|
uLoad8
|
|
// sLoad8 represents a signed 8-bit load into 64-bit register.
|
|
sLoad8
|
|
// uLoad16 represents an unsigned 16-bit load into 64-bit register.
|
|
uLoad16
|
|
// sLoad16 represents a signed 16-bit load into 64-bit register.
|
|
sLoad16
|
|
// uLoad32 represents an unsigned 32-bit load into 64-bit register.
|
|
uLoad32
|
|
// sLoad32 represents a signed 32-bit load into 64-bit register.
|
|
sLoad32
|
|
// uLoad64 represents a 64-bit load.
|
|
uLoad64
|
|
// store8 represents an 8-bit store.
|
|
store8
|
|
// store16 represents a 16-bit store.
|
|
store16
|
|
// store32 represents a 32-bit store.
|
|
store32
|
|
// store64 represents a 64-bit store.
|
|
store64
|
|
// storeP64 represents a store of a pair of registers.
|
|
storeP64
|
|
// loadP64 represents a load of a pair of registers.
|
|
loadP64
|
|
// mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling.
|
|
mov64
|
|
// mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination.
|
|
mov32
|
|
// movZ represents a MOVZ with a 16-bit immediate.
|
|
movZ
|
|
// movN represents a MOVN with a 16-bit immediate.
|
|
movN
|
|
// movK represents a MOVK with a 16-bit immediate.
|
|
movK
|
|
// extend represents a sign- or zero-extend operation.
|
|
extend
|
|
// cSel represents a conditional-select operation.
|
|
cSel
|
|
// cSet represents a conditional-set operation.
|
|
cSet
|
|
// cCmpImm represents a conditional comparison with an immediate.
|
|
cCmpImm
|
|
// fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster.
|
|
fpuMov64
|
|
// fpuMov128 represents a vector register move.
|
|
fpuMov128
|
|
// fpuMovFromVec represents a move to scalar from a vector element.
|
|
fpuMovFromVec
|
|
// fpuRR represents a 1-op FPU instruction.
|
|
fpuRR
|
|
// fpuRRR represents a 2-op FPU instruction.
|
|
fpuRRR
|
|
// fpuRRI represents a 2-op FPU instruction with immediate value.
|
|
fpuRRI
|
|
// fpuRRRR represents a 3-op FPU instruction.
|
|
fpuRRRR
|
|
// fpuCmp represents a FPU comparison, either 32 or 64 bit.
|
|
fpuCmp
|
|
// fpuLoad32 represents a floating-point load, single-precision (32 bit).
|
|
fpuLoad32
|
|
// fpuStore32 represents a floating-point store, single-precision (32 bit).
|
|
fpuStore32
|
|
// fpuLoad64 represents a floating-point load, double-precision (64 bit).
|
|
fpuLoad64
|
|
// fpuStore64 represents a floating-point store, double-precision (64 bit).
|
|
fpuStore64
|
|
// fpuLoad128 represents a floating-point/vector load, 128 bit.
|
|
fpuLoad128
|
|
// fpuStore128 represents a floating-point/vector store, 128 bit.
|
|
fpuStore128
|
|
// loadFpuConst32 represents a load of a 32-bit floating-point constant.
|
|
loadFpuConst32
|
|
// loadFpuConst64 represents a load of a 64-bit floating-point constant.
|
|
loadFpuConst64
|
|
// loadFpuConst128 represents a load of a 128-bit floating-point constant.
|
|
loadFpuConst128
|
|
// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
|
|
vecLoad1R
|
|
// fpuToInt represents a conversion from FP to integer.
|
|
fpuToInt
|
|
// intToFpu represents a conversion from integer to FP.
|
|
intToFpu
|
|
// fpuCSel represents a 32/64-bit FP conditional select.
|
|
fpuCSel
|
|
// movToVec represents a move to a vector element from a GPR.
|
|
movToVec
|
|
// movFromVec represents an unsigned move from a vector element to a GPR.
|
|
movFromVec
|
|
// movFromVecSigned represents a signed move from a vector element to a GPR.
|
|
movFromVecSigned
|
|
// vecDup represents a duplication of general-purpose register to vector.
|
|
vecDup
|
|
// vecDupElement represents a duplication of a vector element to vector or scalar.
|
|
vecDupElement
|
|
// vecDupFromFpu represents a duplication of scalar to vector.
|
|
vecDupFromFpu
|
|
// vecExtract represents a vector extraction operation.
|
|
vecExtract
|
|
// vecExtend represents a vector extension operation.
|
|
vecExtend
|
|
// vecMovElement represents a move vector element to another vector element operation.
|
|
vecMovElement
|
|
// vecMiscNarrow represents a vector narrowing operation.
|
|
vecMiscNarrow
|
|
// vecRRR represents a vector ALU operation.
|
|
vecRRR
|
|
// vecMisc represents a vector two register miscellaneous instruction.
|
|
vecMisc
|
|
// vecLanes represents a vector instruction across lanes.
|
|
vecLanes
|
|
// vecShiftImm represents a SIMD scalar shift by immediate instruction.
|
|
vecShiftImm
|
|
// vecTbl represents a table vector lookup - single register table.
|
|
vecTbl
|
|
// vecTbl2 represents a table vector lookup - two register table.
|
|
vecTbl2
|
|
// vecPermute represents a vector permute instruction.
|
|
vecPermute
|
|
// movToNZCV represents a move to the FPSR.
|
|
movToFPSR
|
|
// movFromNZCV represents a move from the FPSR.
|
|
movFromFPSR
|
|
// call represents a machine call instruction.
|
|
call
|
|
// callInd represents a machine indirect-call instruction.
|
|
callInd
|
|
// ret represents a machine return instruction.
|
|
ret
|
|
// br represents an unconditional branch.
|
|
br
|
|
// condBr represents a conditional branch.
|
|
condBr
|
|
// adr represents a compute the address (using a PC-relative offset) of a memory location.
|
|
adr
|
|
// brTableSequence represents a jump-table sequence.
|
|
brTableSequence
|
|
// exitSequence consists of multiple instructions, and exits the execution immediately.
|
|
// See encodeExitSequence.
|
|
exitSequence
|
|
// UDF is the undefined instruction. For debugging only.
|
|
udf
|
|
|
|
// emitSourceOffsetInfo is a dummy instruction to emit source offset info.
|
|
// The existence of this instruction does not affect the execution.
|
|
emitSourceOffsetInfo
|
|
|
|
// ------------------- do not define below this line -------------------
|
|
numInstructionKinds
|
|
)
|
|
|
|
func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
|
|
i.kind = emitSourceOffsetInfo
|
|
i.u1 = uint64(l)
|
|
return i
|
|
}
|
|
|
|
func (i *instruction) sourceOffsetInfo() ssa.SourceOffset {
|
|
return ssa.SourceOffset(i.u1)
|
|
}
|
|
|
|
func (i *instruction) asUDF() *instruction {
|
|
i.kind = udf
|
|
return i
|
|
}
|
|
|
|
func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
|
|
i.kind = fpuToInt
|
|
i.rn = rn
|
|
i.rd = rd
|
|
if rdSigned {
|
|
i.u1 = 1
|
|
}
|
|
if src64bit {
|
|
i.u2 = 1
|
|
}
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
|
|
i.kind = intToFpu
|
|
i.rn = rn
|
|
i.rd = rd
|
|
if rnSigned {
|
|
i.u1 = 1
|
|
}
|
|
if src64bit {
|
|
i.u2 = 1
|
|
}
|
|
if dst64bit {
|
|
i.u3 = 1
|
|
}
|
|
}
|
|
|
|
func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
|
|
i.kind = exitSequence
|
|
i.rn = operandNR(ctx)
|
|
return i
|
|
}
|
|
|
|
// aluOp determines the type of ALU operation. Instructions whose kind is one of
|
|
// aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
|
|
// would use this type.
|
|
type aluOp int
|
|
|
|
func (a aluOp) String() string {
|
|
switch a {
|
|
case aluOpAdd:
|
|
return "add"
|
|
case aluOpSub:
|
|
return "sub"
|
|
case aluOpOrr:
|
|
return "orr"
|
|
case aluOpAnd:
|
|
return "and"
|
|
case aluOpBic:
|
|
return "bic"
|
|
case aluOpEor:
|
|
return "eor"
|
|
case aluOpAddS:
|
|
return "adds"
|
|
case aluOpSubS:
|
|
return "subs"
|
|
case aluOpSMulH:
|
|
return "sMulH"
|
|
case aluOpUMulH:
|
|
return "uMulH"
|
|
case aluOpSDiv:
|
|
return "sdiv"
|
|
case aluOpUDiv:
|
|
return "udiv"
|
|
case aluOpRotR:
|
|
return "ror"
|
|
case aluOpLsr:
|
|
return "lsr"
|
|
case aluOpAsr:
|
|
return "asr"
|
|
case aluOpLsl:
|
|
return "lsl"
|
|
case aluOpMAdd:
|
|
return "madd"
|
|
case aluOpMSub:
|
|
return "msub"
|
|
}
|
|
panic(int(a))
|
|
}
|
|
|
|
const (
|
|
// 32/64-bit Add.
|
|
aluOpAdd aluOp = iota
|
|
// 32/64-bit Subtract.
|
|
aluOpSub
|
|
// 32/64-bit Bitwise OR.
|
|
aluOpOrr
|
|
// 32/64-bit Bitwise AND.
|
|
aluOpAnd
|
|
// 32/64-bit Bitwise AND NOT.
|
|
aluOpBic
|
|
// 32/64-bit Bitwise XOR (Exclusive OR).
|
|
aluOpEor
|
|
// 32/64-bit Add setting flags.
|
|
aluOpAddS
|
|
// 32/64-bit Subtract setting flags.
|
|
aluOpSubS
|
|
// Signed multiply, high-word result.
|
|
aluOpSMulH
|
|
// Unsigned multiply, high-word result.
|
|
aluOpUMulH
|
|
// 64-bit Signed divide.
|
|
aluOpSDiv
|
|
// 64-bit Unsigned divide.
|
|
aluOpUDiv
|
|
// 32/64-bit Rotate right.
|
|
aluOpRotR
|
|
// 32/64-bit Logical shift right.
|
|
aluOpLsr
|
|
// 32/64-bit Arithmetic shift right.
|
|
aluOpAsr
|
|
// 32/64-bit Logical shift left.
|
|
aluOpLsl /// Multiply-add
|
|
|
|
// MAdd and MSub are only applicable for aluRRRR.
|
|
aluOpMAdd
|
|
aluOpMSub
|
|
)
|
|
|
|
// vecOp determines the type of vector operation. Instructions whose kind is one of
|
|
// vecOpCnt would use this type.
|
|
type vecOp int
|
|
|
|
// String implements fmt.Stringer.
|
|
func (b vecOp) String() string {
|
|
switch b {
|
|
case vecOpCnt:
|
|
return "cnt"
|
|
case vecOpCmeq:
|
|
return "cmeq"
|
|
case vecOpCmgt:
|
|
return "cmgt"
|
|
case vecOpCmhi:
|
|
return "cmhi"
|
|
case vecOpCmge:
|
|
return "cmge"
|
|
case vecOpCmhs:
|
|
return "cmhs"
|
|
case vecOpFcmeq:
|
|
return "fcmeq"
|
|
case vecOpFcmgt:
|
|
return "fcmgt"
|
|
case vecOpFcmge:
|
|
return "fcmge"
|
|
case vecOpCmeq0:
|
|
return "cmeq0"
|
|
case vecOpUaddlv:
|
|
return "uaddlv"
|
|
case vecOpBit:
|
|
return "bit"
|
|
case vecOpBic:
|
|
return "bic"
|
|
case vecOpBsl:
|
|
return "bsl"
|
|
case vecOpNot:
|
|
return "not"
|
|
case vecOpAnd:
|
|
return "and"
|
|
case vecOpOrr:
|
|
return "orr"
|
|
case vecOpEOR:
|
|
return "eor"
|
|
case vecOpFadd:
|
|
return "fadd"
|
|
case vecOpAdd:
|
|
return "add"
|
|
case vecOpAddp:
|
|
return "addp"
|
|
case vecOpAddv:
|
|
return "addv"
|
|
case vecOpSub:
|
|
return "sub"
|
|
case vecOpFsub:
|
|
return "fsub"
|
|
case vecOpSmin:
|
|
return "smin"
|
|
case vecOpUmin:
|
|
return "umin"
|
|
case vecOpUminv:
|
|
return "uminv"
|
|
case vecOpSmax:
|
|
return "smax"
|
|
case vecOpUmax:
|
|
return "umax"
|
|
case vecOpUmaxp:
|
|
return "umaxp"
|
|
case vecOpUrhadd:
|
|
return "urhadd"
|
|
case vecOpFmul:
|
|
return "fmul"
|
|
case vecOpSqrdmulh:
|
|
return "sqrdmulh"
|
|
case vecOpMul:
|
|
return "mul"
|
|
case vecOpUmlal:
|
|
return "umlal"
|
|
case vecOpFdiv:
|
|
return "fdiv"
|
|
case vecOpFsqrt:
|
|
return "fsqrt"
|
|
case vecOpAbs:
|
|
return "abs"
|
|
case vecOpFabs:
|
|
return "fabs"
|
|
case vecOpNeg:
|
|
return "neg"
|
|
case vecOpFneg:
|
|
return "fneg"
|
|
case vecOpFrintp:
|
|
return "frintp"
|
|
case vecOpFrintm:
|
|
return "frintm"
|
|
case vecOpFrintn:
|
|
return "frintn"
|
|
case vecOpFrintz:
|
|
return "frintz"
|
|
case vecOpFcvtl:
|
|
return "fcvtl"
|
|
case vecOpFcvtn:
|
|
return "fcvtn"
|
|
case vecOpFcvtzu:
|
|
return "fcvtzu"
|
|
case vecOpFcvtzs:
|
|
return "fcvtzs"
|
|
case vecOpScvtf:
|
|
return "scvtf"
|
|
case vecOpUcvtf:
|
|
return "ucvtf"
|
|
case vecOpSqxtn:
|
|
return "sqxtn"
|
|
case vecOpUqxtn:
|
|
return "uqxtn"
|
|
case vecOpSqxtun:
|
|
return "sqxtun"
|
|
case vecOpRev64:
|
|
return "rev64"
|
|
case vecOpXtn:
|
|
return "xtn"
|
|
case vecOpShll:
|
|
return "shll"
|
|
case vecOpSshl:
|
|
return "sshl"
|
|
case vecOpSshll:
|
|
return "sshll"
|
|
case vecOpUshl:
|
|
return "ushl"
|
|
case vecOpUshll:
|
|
return "ushll"
|
|
case vecOpSshr:
|
|
return "sshr"
|
|
case vecOpZip1:
|
|
return "zip1"
|
|
case vecOpFmin:
|
|
return "fmin"
|
|
case vecOpFmax:
|
|
return "fmax"
|
|
}
|
|
panic(int(b))
|
|
}
|
|
|
|
const (
|
|
vecOpCnt vecOp = iota
|
|
vecOpCmeq0
|
|
vecOpCmeq
|
|
vecOpCmgt
|
|
vecOpCmhi
|
|
vecOpCmge
|
|
vecOpCmhs
|
|
vecOpFcmeq
|
|
vecOpFcmgt
|
|
vecOpFcmge
|
|
vecOpUaddlv
|
|
vecOpBit
|
|
vecOpBic
|
|
vecOpBsl
|
|
vecOpNot
|
|
vecOpAnd
|
|
vecOpOrr
|
|
vecOpEOR
|
|
vecOpAdd
|
|
vecOpFadd
|
|
vecOpAddv
|
|
vecOpSqadd
|
|
vecOpUqadd
|
|
vecOpAddp
|
|
vecOpSub
|
|
vecOpFsub
|
|
vecOpSqsub
|
|
vecOpUqsub
|
|
vecOpSmin
|
|
vecOpUmin
|
|
vecOpUminv
|
|
vecOpFmin
|
|
vecOpSmax
|
|
vecOpUmax
|
|
vecOpUmaxp
|
|
vecOpFmax
|
|
vecOpUrhadd
|
|
vecOpMul
|
|
vecOpFmul
|
|
vecOpSqrdmulh
|
|
vecOpUmlal
|
|
vecOpFdiv
|
|
vecOpFsqrt
|
|
vecOpAbs
|
|
vecOpFabs
|
|
vecOpNeg
|
|
vecOpFneg
|
|
vecOpFrintm
|
|
vecOpFrintn
|
|
vecOpFrintp
|
|
vecOpFrintz
|
|
vecOpFcvtl
|
|
vecOpFcvtn
|
|
vecOpFcvtzs
|
|
vecOpFcvtzu
|
|
vecOpScvtf
|
|
vecOpUcvtf
|
|
vecOpSqxtn
|
|
vecOpSqxtun
|
|
vecOpUqxtn
|
|
vecOpRev64
|
|
vecOpXtn
|
|
vecOpShll
|
|
vecOpSshl
|
|
vecOpSshll
|
|
vecOpUshl
|
|
vecOpUshll
|
|
vecOpSshr
|
|
vecOpZip1
|
|
)
|
|
|
|
// bitOp determines the type of bitwise operation. Instructions whose kind is one of
|
|
// bitOpRbit and bitOpClz would use this type.
|
|
type bitOp int
|
|
|
|
// String implements fmt.Stringer.
|
|
func (b bitOp) String() string {
|
|
switch b {
|
|
case bitOpRbit:
|
|
return "rbit"
|
|
case bitOpClz:
|
|
return "clz"
|
|
}
|
|
panic(int(b))
|
|
}
|
|
|
|
const (
|
|
// 32/64-bit Rbit.
|
|
bitOpRbit bitOp = iota
|
|
// 32/64-bit Clz.
|
|
bitOpClz
|
|
)
|
|
|
|
// fpuUniOp represents a unary floating-point unit (FPU) operation.
|
|
type fpuUniOp byte
|
|
|
|
const (
|
|
fpuUniOpNeg fpuUniOp = iota
|
|
fpuUniOpCvt32To64
|
|
fpuUniOpCvt64To32
|
|
fpuUniOpSqrt
|
|
fpuUniOpRoundPlus
|
|
fpuUniOpRoundMinus
|
|
fpuUniOpRoundZero
|
|
fpuUniOpRoundNearest
|
|
fpuUniOpAbs
|
|
)
|
|
|
|
// String implements the fmt.Stringer.
|
|
func (f fpuUniOp) String() string {
|
|
switch f {
|
|
case fpuUniOpNeg:
|
|
return "fneg"
|
|
case fpuUniOpCvt32To64:
|
|
return "fcvt"
|
|
case fpuUniOpCvt64To32:
|
|
return "fcvt"
|
|
case fpuUniOpSqrt:
|
|
return "fsqrt"
|
|
case fpuUniOpRoundPlus:
|
|
return "frintp"
|
|
case fpuUniOpRoundMinus:
|
|
return "frintm"
|
|
case fpuUniOpRoundZero:
|
|
return "frintz"
|
|
case fpuUniOpRoundNearest:
|
|
return "frintn"
|
|
case fpuUniOpAbs:
|
|
return "fabs"
|
|
}
|
|
panic(int(f))
|
|
}
|
|
|
|
// fpuBinOp represents a binary floating-point unit (FPU) operation.
|
|
type fpuBinOp byte
|
|
|
|
const (
|
|
fpuBinOpAdd = iota
|
|
fpuBinOpSub
|
|
fpuBinOpMul
|
|
fpuBinOpDiv
|
|
fpuBinOpMax
|
|
fpuBinOpMin
|
|
)
|
|
|
|
// String implements the fmt.Stringer.
|
|
func (f fpuBinOp) String() string {
|
|
switch f {
|
|
case fpuBinOpAdd:
|
|
return "fadd"
|
|
case fpuBinOpSub:
|
|
return "fsub"
|
|
case fpuBinOpMul:
|
|
return "fmul"
|
|
case fpuBinOpDiv:
|
|
return "fdiv"
|
|
case fpuBinOpMax:
|
|
return "fmax"
|
|
case fpuBinOpMin:
|
|
return "fmin"
|
|
}
|
|
panic(int(f))
|
|
}
|
|
|
|
// extMode represents the mode of a register operand extension.
|
|
// For example, aluRRRExtend instructions need this info to determine the extensions.
|
|
type extMode byte
|
|
|
|
const (
|
|
extModeNone extMode = iota
|
|
// extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32.
|
|
extModeZeroExtend32
|
|
// extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32.
|
|
extModeSignExtend32
|
|
// extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64.
|
|
extModeZeroExtend64
|
|
// extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64.
|
|
extModeSignExtend64
|
|
)
|
|
|
|
func (e extMode) bits() byte {
|
|
switch e {
|
|
case extModeZeroExtend32, extModeSignExtend32:
|
|
return 32
|
|
case extModeZeroExtend64, extModeSignExtend64:
|
|
return 64
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func (e extMode) signed() bool {
|
|
switch e {
|
|
case extModeSignExtend32, extModeSignExtend64:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func extModeOf(t ssa.Type, signed bool) extMode {
|
|
switch t.Bits() {
|
|
case 32:
|
|
if signed {
|
|
return extModeSignExtend32
|
|
}
|
|
return extModeZeroExtend32
|
|
case 64:
|
|
if signed {
|
|
return extModeSignExtend64
|
|
}
|
|
return extModeZeroExtend64
|
|
default:
|
|
panic("TODO? do we need narrower than 32 bits?")
|
|
}
|
|
}
|
|
|
|
type extendOp byte
|
|
|
|
const (
|
|
extendOpUXTB extendOp = 0b000
|
|
extendOpUXTH extendOp = 0b001
|
|
extendOpUXTW extendOp = 0b010
|
|
// extendOpUXTX does nothing, but convenient symbol that officially exists. See:
|
|
// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
|
|
extendOpUXTX extendOp = 0b011
|
|
extendOpSXTB extendOp = 0b100
|
|
extendOpSXTH extendOp = 0b101
|
|
extendOpSXTW extendOp = 0b110
|
|
// extendOpSXTX does nothing, but convenient symbol that officially exists. See:
|
|
// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
|
|
extendOpSXTX extendOp = 0b111
|
|
extendOpNone extendOp = 0xff
|
|
)
|
|
|
|
func (e extendOp) srcBits() byte {
|
|
switch e {
|
|
case extendOpUXTB, extendOpSXTB:
|
|
return 8
|
|
case extendOpUXTH, extendOpSXTH:
|
|
return 16
|
|
case extendOpUXTW, extendOpSXTW:
|
|
return 32
|
|
case extendOpUXTX, extendOpSXTX:
|
|
return 64
|
|
}
|
|
panic(int(e))
|
|
}
|
|
|
|
func (e extendOp) String() string {
|
|
switch e {
|
|
case extendOpUXTB:
|
|
return "UXTB"
|
|
case extendOpUXTH:
|
|
return "UXTH"
|
|
case extendOpUXTW:
|
|
return "UXTW"
|
|
case extendOpUXTX:
|
|
return "UXTX"
|
|
case extendOpSXTB:
|
|
return "SXTB"
|
|
case extendOpSXTH:
|
|
return "SXTH"
|
|
case extendOpSXTW:
|
|
return "SXTW"
|
|
case extendOpSXTX:
|
|
return "SXTX"
|
|
}
|
|
panic(int(e))
|
|
}
|
|
|
|
func extendOpFrom(signed bool, from byte) extendOp {
|
|
switch from {
|
|
case 8:
|
|
if signed {
|
|
return extendOpSXTB
|
|
}
|
|
return extendOpUXTB
|
|
case 16:
|
|
if signed {
|
|
return extendOpSXTH
|
|
}
|
|
return extendOpUXTH
|
|
case 32:
|
|
if signed {
|
|
return extendOpSXTW
|
|
}
|
|
return extendOpUXTW
|
|
case 64:
|
|
if signed {
|
|
return extendOpSXTX
|
|
}
|
|
return extendOpUXTX
|
|
}
|
|
panic("invalid extendOpFrom")
|
|
}
|
|
|
|
type shiftOp byte
|
|
|
|
const (
|
|
shiftOpLSL shiftOp = 0b00
|
|
shiftOpLSR shiftOp = 0b01
|
|
shiftOpASR shiftOp = 0b10
|
|
shiftOpROR shiftOp = 0b11
|
|
)
|
|
|
|
func (s shiftOp) String() string {
|
|
switch s {
|
|
case shiftOpLSL:
|
|
return "lsl"
|
|
case shiftOpLSR:
|
|
return "lsr"
|
|
case shiftOpASR:
|
|
return "asr"
|
|
case shiftOpROR:
|
|
return "ror"
|
|
}
|
|
panic(int(s))
|
|
}
|
|
|
|
const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
|
|
|
|
// size returns the size of the instruction in encoded bytes.
|
|
func (i *instruction) size() int64 {
|
|
switch i.kind {
|
|
case exitSequence:
|
|
return exitSequenceSize // 5 instructions as in encodeExitSequence.
|
|
case nop0:
|
|
return 0
|
|
case emitSourceOffsetInfo:
|
|
return 0
|
|
case loadFpuConst32:
|
|
if i.u1 == 0 {
|
|
return 4 // zero loading can be encoded as a single instruction.
|
|
}
|
|
return 4 + 4 + 4
|
|
case loadFpuConst64:
|
|
if i.u1 == 0 {
|
|
return 4 // zero loading can be encoded as a single instruction.
|
|
}
|
|
return 4 + 4 + 8
|
|
case loadFpuConst128:
|
|
if i.u1 == 0 && i.u2 == 0 {
|
|
return 4 // zero loading can be encoded as a single instruction.
|
|
}
|
|
return 4 + 4 + 16
|
|
case brTableSequence:
|
|
return 4*4 + int64(len(i.targets))*4
|
|
default:
|
|
return 4
|
|
}
|
|
}
|
|
|
|
// vecArrangement is the arrangement of data within a vector register.
|
|
type vecArrangement byte
|
|
|
|
const (
|
|
// vecArrangementNone is an arrangement indicating no data is stored.
|
|
vecArrangementNone vecArrangement = iota
|
|
// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
|
|
vecArrangement8B
|
|
// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
|
|
vecArrangement16B
|
|
// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
|
|
vecArrangement4H
|
|
// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
|
|
vecArrangement8H
|
|
// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
|
|
vecArrangement2S
|
|
// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
|
|
vecArrangement4S
|
|
// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
|
|
vecArrangement1D
|
|
// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
|
|
vecArrangement2D
|
|
|
|
// Assign each vector size specifier to a vector arrangement ID.
|
|
// Instructions can only have an arrangement or a size specifier, but not both, so it
|
|
// simplifies the internal representation of vector instructions by being able to
|
|
// store either into the same field.
|
|
|
|
// vecArrangementB is a size specifier of byte
|
|
vecArrangementB
|
|
// vecArrangementH is a size specifier of word (16-bit)
|
|
vecArrangementH
|
|
// vecArrangementS is a size specifier of double word (32-bit)
|
|
vecArrangementS
|
|
// vecArrangementD is a size specifier of quad word (64-bit)
|
|
vecArrangementD
|
|
// vecArrangementQ is a size specifier of the entire vector (128-bit)
|
|
vecArrangementQ
|
|
)
|
|
|
|
// String implements fmt.Stringer
|
|
func (v vecArrangement) String() (ret string) {
|
|
switch v {
|
|
case vecArrangement8B:
|
|
ret = "8B"
|
|
case vecArrangement16B:
|
|
ret = "16B"
|
|
case vecArrangement4H:
|
|
ret = "4H"
|
|
case vecArrangement8H:
|
|
ret = "8H"
|
|
case vecArrangement2S:
|
|
ret = "2S"
|
|
case vecArrangement4S:
|
|
ret = "4S"
|
|
case vecArrangement1D:
|
|
ret = "1D"
|
|
case vecArrangement2D:
|
|
ret = "2D"
|
|
case vecArrangementB:
|
|
ret = "B"
|
|
case vecArrangementH:
|
|
ret = "H"
|
|
case vecArrangementS:
|
|
ret = "S"
|
|
case vecArrangementD:
|
|
ret = "D"
|
|
case vecArrangementQ:
|
|
ret = "Q"
|
|
case vecArrangementNone:
|
|
ret = "none"
|
|
default:
|
|
panic(v)
|
|
}
|
|
return
|
|
}
|
|
|
|
// vecIndex is the index of an element of a vector register
|
|
type vecIndex byte
|
|
|
|
// vecIndexNone indicates no vector index specified.
|
|
const vecIndexNone = ^vecIndex(0)
|
|
|
|
func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement {
|
|
switch lane {
|
|
case ssa.VecLaneI8x16:
|
|
return vecArrangement16B
|
|
case ssa.VecLaneI16x8:
|
|
return vecArrangement8H
|
|
case ssa.VecLaneI32x4:
|
|
return vecArrangement4S
|
|
case ssa.VecLaneI64x2:
|
|
return vecArrangement2D
|
|
case ssa.VecLaneF32x4:
|
|
return vecArrangement4S
|
|
case ssa.VecLaneF64x2:
|
|
return vecArrangement2D
|
|
default:
|
|
panic(lane)
|
|
}
|
|
}
|