package arm64 import ( "fmt" "math" "strings" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) type ( // instruction represents either a real instruction in arm64, or the meta instructions // that are convenient for code generation. For example, inline constants are also treated // as instructions. // // Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation // can be considered equivalent to the sequence of such instructions. // // Each field is interpreted depending on the kind. // // TODO: optimize the layout later once the impl settles. instruction struct { kind instructionKind prev, next *instruction u1, u2, u3 uint64 rd, rm, rn, ra operand amode addressMode abi *abiImpl targets []uint32 addedBeforeRegAlloc bool } // instructionKind represents the kind of instruction. // This controls how the instruction struct is interpreted. instructionKind int ) type defKind byte const ( defKindNone defKind = iota + 1 defKindRD defKindCall ) var defKinds = [numInstructionKinds]defKind{ adr: defKindRD, aluRRR: defKindRD, aluRRRR: defKindRD, aluRRImm12: defKindRD, aluRRBitmaskImm: defKindRD, aluRRRShift: defKindRD, aluRRImmShift: defKindRD, aluRRRExtend: defKindRD, bitRR: defKindRD, movZ: defKindRD, movK: defKindRD, movN: defKindRD, mov32: defKindRD, mov64: defKindRD, fpuMov64: defKindRD, fpuMov128: defKindRD, fpuRR: defKindRD, fpuRRR: defKindRD, nop0: defKindNone, call: defKindCall, callInd: defKindCall, ret: defKindNone, store8: defKindNone, store16: defKindNone, store32: defKindNone, store64: defKindNone, exitSequence: defKindNone, condBr: defKindNone, br: defKindNone, brTableSequence: defKindNone, cSet: defKindRD, extend: defKindRD, fpuCmp: defKindNone, uLoad8: defKindRD, uLoad16: defKindRD, uLoad32: defKindRD, sLoad8: defKindRD, sLoad16: defKindRD, sLoad32: defKindRD, uLoad64: defKindRD, fpuLoad32: defKindRD, fpuLoad64: defKindRD, fpuLoad128: defKindRD, vecLoad1R: defKindRD, loadFpuConst32: defKindRD, loadFpuConst64: defKindRD, loadFpuConst128: defKindRD, fpuStore32: defKindNone, fpuStore64: defKindNone, fpuStore128: defKindNone, udf: defKindNone, cSel: defKindRD, fpuCSel: defKindRD, movToVec: defKindRD, movFromVec: defKindRD, movFromVecSigned: defKindRD, vecDup: defKindRD, vecDupElement: defKindRD, vecExtract: defKindRD, vecMisc: defKindRD, vecMovElement: defKindRD, vecLanes: defKindRD, vecShiftImm: defKindRD, vecTbl: defKindRD, vecTbl2: defKindRD, vecPermute: defKindRD, vecRRR: defKindRD, fpuToInt: defKindRD, intToFpu: defKindRD, cCmpImm: defKindNone, movToFPSR: defKindNone, movFromFPSR: defKindRD, emitSourceOffsetInfo: defKindNone, } // defs returns the list of regalloc.VReg that are defined by the instruction. // In order to reduce the number of allocations, the caller can pass the slice to be used. func (i *instruction) defs(regs []regalloc.VReg) []regalloc.VReg { switch defKinds[i.kind] { case defKindNone: case defKindRD: regs = append(regs, i.rd.nr()) case defKindCall: regs = append(regs, i.abi.retRealRegs...) default: panic(fmt.Sprintf("defKind for %v not defined", i)) } return regs } func (i *instruction) assignDef(reg regalloc.VReg) { switch defKinds[i.kind] { case defKindNone: case defKindRD: i.rd = i.rd.assignReg(reg) case defKindCall: panic("BUG: call instructions shouldn't be assigned") default: panic(fmt.Sprintf("defKind for %v not defined", i)) } } type useKind byte const ( useKindNone useKind = iota + 1 useKindRN useKindRNRM useKindRNRMRA useKindRNRN1RM useKindRet useKindCall useKindCallInd useKindAMode useKindRNAMode useKindCond ) var useKinds = [numInstructionKinds]useKind{ udf: useKindNone, aluRRR: useKindRNRM, aluRRRR: useKindRNRMRA, aluRRImm12: useKindRN, aluRRBitmaskImm: useKindRN, aluRRRShift: useKindRNRM, aluRRImmShift: useKindRN, aluRRRExtend: useKindRNRM, bitRR: useKindRN, movZ: useKindNone, movK: useKindNone, movN: useKindNone, mov32: useKindRN, mov64: useKindRN, fpuMov64: useKindRN, fpuMov128: useKindRN, fpuRR: useKindRN, fpuRRR: useKindRNRM, nop0: useKindNone, call: useKindCall, callInd: useKindCallInd, ret: useKindRet, store8: useKindRNAMode, store16: useKindRNAMode, store32: useKindRNAMode, store64: useKindRNAMode, exitSequence: useKindRN, condBr: useKindCond, br: useKindNone, brTableSequence: useKindRN, cSet: useKindNone, extend: useKindRN, fpuCmp: useKindRNRM, uLoad8: useKindAMode, uLoad16: useKindAMode, uLoad32: useKindAMode, sLoad8: useKindAMode, sLoad16: useKindAMode, sLoad32: useKindAMode, uLoad64: useKindAMode, fpuLoad32: useKindAMode, fpuLoad64: useKindAMode, fpuLoad128: useKindAMode, fpuStore32: useKindRNAMode, fpuStore64: useKindRNAMode, fpuStore128: useKindRNAMode, loadFpuConst32: useKindNone, loadFpuConst64: useKindNone, loadFpuConst128: useKindNone, vecLoad1R: useKindRN, cSel: useKindRNRM, fpuCSel: useKindRNRM, movToVec: useKindRN, movFromVec: useKindRN, movFromVecSigned: useKindRN, vecDup: useKindRN, vecDupElement: useKindRN, vecExtract: useKindRNRM, cCmpImm: useKindRN, vecMisc: useKindRN, vecMovElement: useKindRN, vecLanes: useKindRN, vecShiftImm: useKindRN, vecTbl: useKindRNRM, vecTbl2: useKindRNRN1RM, vecRRR: useKindRNRM, vecPermute: useKindRNRM, fpuToInt: useKindRN, intToFpu: useKindRN, movToFPSR: useKindRN, movFromFPSR: useKindNone, adr: useKindNone, emitSourceOffsetInfo: useKindNone, } // uses returns the list of regalloc.VReg that are used by the instruction. // In order to reduce the number of allocations, the caller can pass the slice to be used. func (i *instruction) uses(regs []regalloc.VReg) []regalloc.VReg { switch useKinds[i.kind] { case useKindNone: case useKindRN: if rn := i.rn.reg(); rn.Valid() { regs = append(regs, rn) } case useKindRNRM: if rn := i.rn.reg(); rn.Valid() { regs = append(regs, rn) } if rm := i.rm.reg(); rm.Valid() { regs = append(regs, rm) } case useKindRNRMRA: if rn := i.rn.reg(); rn.Valid() { regs = append(regs, rn) } if rm := i.rm.reg(); rm.Valid() { regs = append(regs, rm) } if ra := i.ra.reg(); ra.Valid() { regs = append(regs, ra) } case useKindRNRN1RM: if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() { rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) regs = append(regs, rn, rn1) } if rm := i.rm.reg(); rm.Valid() { regs = append(regs, rm) } case useKindRet: regs = append(regs, i.abi.retRealRegs...) case useKindAMode: if amodeRN := i.amode.rn; amodeRN.Valid() { regs = append(regs, amodeRN) } if amodeRM := i.amode.rm; amodeRM.Valid() { regs = append(regs, amodeRM) } case useKindRNAMode: regs = append(regs, i.rn.reg()) if amodeRN := i.amode.rn; amodeRN.Valid() { regs = append(regs, amodeRN) } if amodeRM := i.amode.rm; amodeRM.Valid() { regs = append(regs, amodeRM) } case useKindCond: cnd := cond(i.u1) if cnd.kind() != condKindCondFlagSet { regs = append(regs, cnd.register()) } case useKindCall: regs = append(regs, i.abi.argRealRegs...) case useKindCallInd: regs = append(regs, i.rn.nr()) regs = append(regs, i.abi.argRealRegs...) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } return regs } func (i *instruction) assignUse(index int, reg regalloc.VReg) { switch useKinds[i.kind] { case useKindNone: case useKindRN: if rn := i.rn.reg(); rn.Valid() { i.rn = i.rn.assignReg(reg) } case useKindRNRM: if index == 0 { if rn := i.rn.reg(); rn.Valid() { i.rn = i.rn.assignReg(reg) } } else { if rm := i.rm.reg(); rm.Valid() { i.rm = i.rm.assignReg(reg) } } case useKindRNRN1RM: if index == 0 { if rn := i.rn.reg(); rn.Valid() { i.rn = i.rn.assignReg(reg) } if rn1 := i.rn.reg() + 1; rn1.Valid() { i.rm = i.rm.assignReg(reg + 1) } } else { if rm := i.rm.reg(); rm.Valid() { i.rm = i.rm.assignReg(reg) } } case useKindRNRMRA: if index == 0 { if rn := i.rn.reg(); rn.Valid() { i.rn = i.rn.assignReg(reg) } } else if index == 1 { if rm := i.rm.reg(); rm.Valid() { i.rm = i.rm.assignReg(reg) } } else { if ra := i.ra.reg(); ra.Valid() { i.ra = i.ra.assignReg(reg) } } case useKindRet: panic("BUG: ret instructions shouldn't be assigned") case useKindAMode: if index == 0 { if amodeRN := i.amode.rn; amodeRN.Valid() { i.amode.rn = reg } } else { if amodeRM := i.amode.rm; amodeRM.Valid() { i.amode.rm = reg } } case useKindRNAMode: if index == 0 { i.rn = i.rn.assignReg(reg) } else if index == 1 { if amodeRN := i.amode.rn; amodeRN.Valid() { i.amode.rn = reg } else { panic("BUG") } } else { if amodeRM := i.amode.rm; amodeRM.Valid() { i.amode.rm = reg } else { panic("BUG") } } case useKindCond: c := cond(i.u1) switch c.kind() { case condKindRegisterZero: i.u1 = uint64(registerAsRegZeroCond(reg)) case condKindRegisterNotZero: i.u1 = uint64(registerAsRegNotZeroCond(reg)) } case useKindCall: panic("BUG: call instructions shouldn't be assigned") case useKindCallInd: i.rn = i.rn.assignReg(reg) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } } func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) { i.kind = call i.u1 = uint64(ref) i.abi = abi } func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) { i.kind = callInd i.rn = operandNR(ptr) i.abi = abi } func (i *instruction) callFuncRef() ssa.FuncRef { return ssa.FuncRef(i.u1) } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { i.kind = movZ i.rd = operandNR(dst) i.u1 = imm i.u2 = shift if dst64bit { i.u3 = 1 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { i.kind = movK i.rd = operandNR(dst) i.u1 = imm i.u2 = shift if dst64bit { i.u3 = 1 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { i.kind = movN i.rd = operandNR(dst) i.u1 = imm i.u2 = shift if dst64bit { i.u3 = 1 } } func (i *instruction) asNop0() *instruction { i.kind = nop0 return i } func (i *instruction) asNop0WithLabel(l label) { i.kind = nop0 i.u1 = uint64(l) } func (i *instruction) nop0Label() label { return label(i.u1) } func (i *instruction) asRet(abi *abiImpl) { i.kind = ret i.abi = abi } func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { i.kind = storeP64 i.rn = operandNR(src1) i.rm = operandNR(src2) i.amode = amode } func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { i.kind = loadP64 i.rn = operandNR(src1) i.rm = operandNR(src2) i.amode = amode } func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = store8 case 16: i.kind = store16 case 32: if src.reg().RegType() == regalloc.RegTypeInt { i.kind = store32 } else { i.kind = fpuStore32 } case 64: if src.reg().RegType() == regalloc.RegTypeInt { i.kind = store64 } else { i.kind = fpuStore64 } case 128: i.kind = fpuStore128 } i.rn = src i.amode = amode } func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = sLoad8 case 16: i.kind = sLoad16 case 32: i.kind = sLoad32 default: panic("BUG") } i.rd = dst i.amode = amode } func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = uLoad8 case 16: i.kind = uLoad16 case 32: i.kind = uLoad32 case 64: i.kind = uLoad64 } i.rd = dst i.amode = amode } func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { switch sizeInBits { case 32: i.kind = fpuLoad32 case 64: i.kind = fpuLoad64 case 128: i.kind = fpuLoad128 } i.rd = dst i.amode = amode } func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { // NOTE: currently only has support for no-offset loads, though it is suspicious that // we would need to support offset load (that is only available for post-index). i.kind = vecLoad1R i.rd = rd i.rn = rn i.u1 = uint64(arr) } func (i *instruction) asCSet(rd regalloc.VReg, c condFlag) { i.kind = cSet i.rd = operandNR(rd) i.u1 = uint64(c) } func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { i.kind = cSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { i.u3 = 1 } } func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { i.kind = fpuCSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { i.u3 = 1 } } func (i *instruction) asBr(target label) { if target == returnLabel { panic("BUG: call site should special case for returnLabel") } i.kind = br i.u1 = uint64(target) } func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) { i.kind = brTableSequence i.rn = operandNR(indexReg) i.targets = targets } func (i *instruction) brTableSequenceOffsetsResolved() { i.u3 = 1 // indicate that the offsets are resolved, for debugging. } func (i *instruction) brLabel() label { return label(i.u1) } // brOffsetResolved is called when the target label is resolved. func (i *instruction) brOffsetResolved(offset int64) { i.u2 = uint64(offset) i.u3 = 1 // indicate that the offset is resolved, for debugging. } func (i *instruction) brOffset() int64 { return int64(i.u2) } // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag. func (i *instruction) asCondBr(c cond, target label, is64bit bool) { i.kind = condBr i.u1 = c.asUint64() i.u2 = uint64(target) if is64bit { i.u3 = 1 } } func (i *instruction) condBrLabel() label { return label(i.u2) } // condBrOffsetResolve is called when the target label is resolved. func (i *instruction) condBrOffsetResolve(offset int64) { i.rd.data = uint64(offset) i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. } // condBrOffsetResolved returns true if condBrOffsetResolve is already called. func (i *instruction) condBrOffsetResolved() bool { return i.rd.data2 == 1 } func (i *instruction) condBrOffset() int64 { return int64(i.rd.data) } func (i *instruction) condBrCond() cond { return cond(i.u1) } func (i *instruction) condBr64bit() bool { return i.u3 == 1 } func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst32 i.u1 = raw i.rd = operandNR(rd) } func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst64 i.u1 = raw i.rd = operandNR(rd) } func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { i.kind = loadFpuConst128 i.u1 = lo i.u2 = hi i.rd = operandNR(rd) } func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { i.kind = fpuCmp i.rn, i.rm = rn, rm if is64bit { i.u3 = 1 } } func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) { i.kind = cCmpImm i.rn = rn i.rm.data = imm i.u1 = uint64(c) i.u2 = uint64(flag) if is64bit { i.u3 = 1 } } // asALU setups a basic ALU instruction. func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR case operandKindSR: i.kind = aluRRRShift case operandKindER: i.kind = aluRRRExtend case operandKindImm12: i.kind = aluRRImm12 default: panic("BUG") } i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { i.u3 = 1 } } // asALU setups a basic ALU instruction. func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { i.kind = aluRRRR i.u1 = uint64(aluOp) i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra if dst64bit { i.u3 = 1 } } // asALUShift setups a shift based ALU instruction. func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. case operandKindShiftImm: i.kind = aluRRImmShift default: panic("BUG") } i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { i.u3 = 1 } } func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { i.kind = aluRRBitmaskImm i.u1 = uint64(aluOp) i.rn, i.rd = operandNR(rn), operandNR(rd) i.u2 = imm if dst64bit { i.u3 = 1 } } func (i *instruction) asMovToFPSR(rn regalloc.VReg) { i.kind = movToFPSR i.rn = operandNR(rn) } func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { i.kind = movFromFPSR i.rd = operandNR(rd) } func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { i.kind = bitRR i.rn, i.rd = operandNR(rn), operandNR(rd) i.u1 = uint64(bitOp) if is64bit { i.u2 = 1 } } func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { i.kind = fpuRRR i.u1 = uint64(op) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { i.u3 = 1 } } func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { i.kind = fpuRR i.u1 = uint64(op) i.rd, i.rn = rd, rn if dst64bit { i.u3 = 1 } } func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { i.kind = extend i.rn, i.rd = operandNR(rn), operandNR(rd) i.u1 = uint64(fromBits) i.u2 = uint64(toBits) if signed { i.u3 = 1 } } func (i *instruction) asMove32(rd, rn regalloc.VReg) { i.kind = mov32 i.rn, i.rd = operandNR(rn), operandNR(rd) } func (i *instruction) asMove64(rd, rn regalloc.VReg) { i.kind = mov64 i.rn, i.rd = operandNR(rn), operandNR(rd) } func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { i.kind = fpuMov64 i.rn, i.rd = operandNR(rn), operandNR(rd) } func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) { i.kind = fpuMov128 i.rn, i.rd = operandNR(rn), operandNR(rd) } func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { i.kind = movToVec i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { if signed { i.kind = movFromVecSigned } else { i.kind = movFromVec } i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { i.kind = vecDup i.u1 = uint64(arr) i.rn, i.rd = rn, rd } func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { i.kind = vecDupElement i.u1 = uint64(arr) i.rn, i.rd = rn, rd i.u2 = uint64(index) } func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { i.kind = vecExtract i.u1 = uint64(arr) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(index) } func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { i.kind = vecMovElement i.u1 = uint64(arr) i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) i.rn, i.rd = rn, rd } func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { i.kind = vecMisc i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { i.kind = vecLanes i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) { i.kind = vecShiftImm i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { switch nregs { case 0, 1: i.kind = vecTbl case 2: i.kind = vecTbl2 if !rn.reg().IsRealReg() { panic("rn is not a RealReg") } if rn.realReg() == v31 { panic("rn cannot be v31") } default: panic(fmt.Sprintf("unsupported number of registers %d", nregs)) } i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { i.kind = vecPermute i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) { i.kind = vecRRR i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm i.u2 = uint64(arr) } func (i *instruction) isCopy() bool { op := i.kind // We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits, // and it is only used in the translation of IReduce, not the actual copy indeed. return op == mov64 || op == fpuMov64 || op == fpuMov128 } // String implements fmt.Stringer. func (i *instruction) String() (str string) { is64SizeBitToSize := func(u3 uint64) byte { if u3 == 0 { return 32 } return 64 } switch i.kind { case nop0: if i.u1 != 0 { l := label(i.u1) str = fmt.Sprintf("%s:", l) } else { str = "nop0" } case aluRRR: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRRR: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) case aluRRImm12: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRBitmaskImm: size := is64SizeBitToSize(i.u3) rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) if size == 32 { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) } else { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) } case aluRRImmShift: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %#x", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.shiftImm(), ) case aluRRRShift: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size), ) case aluRRRExtend: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), // Regardless of the source size, the register is formatted in 32-bit. i.rm.format(32), ) case bitRR: size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s", bitOp(i.u1), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), ) case uLoad8: str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad8: str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad16: str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad16: str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad32: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad32: str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad64: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) case store8: str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) case store16: str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) case store32: str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) case store64: str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) case storeP64: str = fmt.Sprintf("stp %s, %s, %s", formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) case loadP64: str = fmt.Sprintf("ldp %s, %s, %s", formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) case mov64: str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 64), formatVRegSized(i.rn.nr(), 64)) case mov32: str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) case movZ: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) case movN: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) case movK: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) case extend: fromBits, toBits := byte(i.u1), byte(i.u2) var signedStr string if i.u3 == 1 { signedStr = "s" } else { signedStr = "u" } var fromStr string switch fromBits { case 8: fromStr = "b" case 16: fromStr = "h" case 32: fromStr = "w" } str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) case cSel: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("csel %s, %s, %s, %s", formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case cSet: str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) case cCmpImm: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", formatVRegSized(i.rn.nr(), size), i.rm.data, i.u2&0b1111, condFlag(i.u1)) case fpuMov64: str = fmt.Sprintf("mov %s, %s", formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) case fpuMov128: str = fmt.Sprintf("mov %s, %s", formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) case fpuMovFromVec: panic("TODO") case fpuRR: dstSz := is64SizeBitToSize(i.u3) srcSz := dstSz op := fpuUniOp(i.u1) switch op { case fpuUniOpCvt32To64: srcSz = 32 case fpuUniOpCvt64To32: srcSz = 64 } str = fmt.Sprintf("%s %s, %s", op.String(), formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) case fpuRRR: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuRRI: panic("TODO") case fpuRRRR: panic("TODO") case fpuCmp: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("fcmp %s, %s", formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuLoad32: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case fpuStore32: str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) case fpuLoad64: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) case fpuStore64: str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) case fpuLoad128: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) case fpuStore128: str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) case loadFpuConst32: str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) case loadFpuConst64: str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) case loadFpuConst128: str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) case fpuToInt: var op, src, dst string if signed := i.u1 == 1; signed { op = "fcvtzs" } else { op = "fcvtzu" } if src64 := i.u2 == 1; src64 { src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) } else { src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) } if dst64 := i.u3 == 1; dst64 { dst = formatVRegSized(i.rd.nr(), 64) } else { dst = formatVRegSized(i.rd.nr(), 32) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case intToFpu: var op, src, dst string if signed := i.u1 == 1; signed { op = "scvtf" } else { op = "ucvtf" } if src64 := i.u2 == 1; src64 { src = formatVRegSized(i.rn.nr(), 64) } else { src = formatVRegSized(i.rn.nr(), 32) } if dst64 := i.u3 == 1; dst64 { dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) } else { dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case fpuCSel: size := is64SizeBitToSize(i.u3) str = fmt.Sprintf("fcsel %s, %s, %s, %s", formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case movToVec: var size byte arr := vecArrangement(i.u1) switch arr { case vecArrangementB, vecArrangementH, vecArrangementS: size = 32 case vecArrangementD: size = 64 default: panic("unsupported arrangement " + arr.String()) } str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) case movFromVec, movFromVecSigned: var size byte var opcode string arr := vecArrangement(i.u1) signed := i.kind == movFromVecSigned switch arr { case vecArrangementB, vecArrangementH, vecArrangementS: size = 32 if signed { opcode = "smov" } else { opcode = "umov" } case vecArrangementD: size = 64 if signed { opcode = "smov" } else { opcode = "mov" } default: panic("unsupported arrangement " + arr.String()) } str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) case vecDup: str = fmt.Sprintf("dup %s, %s", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64), ) case vecDupElement: arr := vecArrangement(i.u1) str = fmt.Sprintf("dup %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), ) case vecDupFromFpu: panic("TODO") case vecExtract: str = fmt.Sprintf("ext %s, %s, %s, #%d", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), uint32(i.u2), ) case vecExtend: panic("TODO") case vecMovElement: str = fmt.Sprintf("mov %s, %s", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), ) case vecMiscNarrow: panic("TODO") case vecRRR: str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), ) case vecMisc: vop := vecOp(i.u1) if vop == vecOpCmeq0 { str = fmt.Sprintf("cmeq %s, %s, #0", formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } else { str = fmt.Sprintf("%s %s, %s", vop, formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } case vecLanes: arr := vecArrangement(i.u2) var destArr vecArrangement switch arr { case vecArrangement8B, vecArrangement16B: destArr = vecArrangementH case vecArrangement4H, vecArrangement8H: destArr = vecArrangementS case vecArrangement4S: destArr = vecArrangementD default: panic("invalid arrangement " + arr.String()) } str = fmt.Sprintf("%s %s, %s", vecOp(i.u1), formatVRegWidthVec(i.rd.nr(), destArr), formatVRegVec(i.rn.nr(), arr, vecIndexNone)) case vecShiftImm: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, #%d", vecOp(i.u1), formatVRegVec(i.rd.nr(), arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), i.rm.shiftImm()) case vecTbl: arr := vecArrangement(i.u2) str = fmt.Sprintf("tbl %s, { %s }, %s", formatVRegVec(i.rd.nr(), arr, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case vecTbl2: arr := vecArrangement(i.u2) rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) str = fmt.Sprintf("tbl %s, { %s, %s }, %s", formatVRegVec(rd, arr, vecIndexNone), formatVRegVec(rn, vecArrangement16B, vecIndexNone), formatVRegVec(rn1, vecArrangement16B, vecIndexNone), formatVRegVec(rm, arr, vecIndexNone)) case vecPermute: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), formatVRegVec(i.rd.nr(), arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case movToFPSR: str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) case movFromFPSR: str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) case call: if i.u2 > 0 { str = fmt.Sprintf("bl #%#x", i.u2) } else { str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) } case callInd: str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64)) case ret: str = "ret" case br: target := label(i.u1) if i.u3 != 0 { str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) } else { str = fmt.Sprintf("b %s", target.String()) } case condBr: size := is64SizeBitToSize(i.u3) c := cond(i.u1) target := label(i.u2) switch c.kind() { case condKindRegisterZero: if !i.condBrOffsetResolved() { str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String()) } else { str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String()) } case condKindRegisterNotZero: if offset := i.condBrOffset(); offset != 0 { str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String()) } else { str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String()) } case condKindCondFlagSet: if offset := i.condBrOffset(); offset != 0 { if target == invalidLabel { str = fmt.Sprintf("b.%s #%#x", c.flag(), offset) } else { str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String()) } } else { str = fmt.Sprintf("b.%s %s", c.flag(), target.String()) } } case adr: str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) case brTableSequence: if i.u3 == 0 { // The offsets haven't been resolved yet. labels := make([]string, len(i.targets)) for index, l := range i.targets { labels[index] = label(l).String() } str = fmt.Sprintf("br_table_sequence %s, [%s]", formatVRegSized(i.rn.nr(), 64), strings.Join(labels, ", "), ) } else { // See encodeBrTableSequence for the encoding. offsets := make([]string, len(i.targets)) for index, offset := range i.targets { offsets[index] = fmt.Sprintf("%#x", int32(offset)) } str = fmt.Sprintf( `adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`, formatVRegSized(i.rn.nr(), 64), formatVRegSized(tmpRegVReg, 64), offsets, ) } case exitSequence: str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64)) case udf: str = "udf" case emitSourceOffsetInfo: str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) case vecLoad1R: str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) default: panic(i.kind) } return } func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { i.kind = adr i.rd = operandNR(rd) i.u1 = uint64(offset) } // TODO: delete unnecessary things. const ( // nop0 represents a no-op of zero size. nop0 instructionKind = iota + 1 // aluRRR represents an ALU operation with two register sources and a register destination. aluRRR // aluRRRR represents an ALU operation with three register sources and a register destination. aluRRRR // aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination. aluRRImm12 // aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination. aluRRBitmaskImm // aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination. aluRRImmShift // aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination. aluRRRShift // aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination. aluRRRExtend // bitRR represents a bit op instruction with a single register source. bitRR // uLoad8 represents an unsigned 8-bit load. uLoad8 // sLoad8 represents a signed 8-bit load into 64-bit register. sLoad8 // uLoad16 represents an unsigned 16-bit load into 64-bit register. uLoad16 // sLoad16 represents a signed 16-bit load into 64-bit register. sLoad16 // uLoad32 represents an unsigned 32-bit load into 64-bit register. uLoad32 // sLoad32 represents a signed 32-bit load into 64-bit register. sLoad32 // uLoad64 represents a 64-bit load. uLoad64 // store8 represents an 8-bit store. store8 // store16 represents a 16-bit store. store16 // store32 represents a 32-bit store. store32 // store64 represents a 64-bit store. store64 // storeP64 represents a store of a pair of registers. storeP64 // loadP64 represents a load of a pair of registers. loadP64 // mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling. mov64 // mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination. mov32 // movZ represents a MOVZ with a 16-bit immediate. movZ // movN represents a MOVN with a 16-bit immediate. movN // movK represents a MOVK with a 16-bit immediate. movK // extend represents a sign- or zero-extend operation. extend // cSel represents a conditional-select operation. cSel // cSet represents a conditional-set operation. cSet // cCmpImm represents a conditional comparison with an immediate. cCmpImm // fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster. fpuMov64 // fpuMov128 represents a vector register move. fpuMov128 // fpuMovFromVec represents a move to scalar from a vector element. fpuMovFromVec // fpuRR represents a 1-op FPU instruction. fpuRR // fpuRRR represents a 2-op FPU instruction. fpuRRR // fpuRRI represents a 2-op FPU instruction with immediate value. fpuRRI // fpuRRRR represents a 3-op FPU instruction. fpuRRRR // fpuCmp represents a FPU comparison, either 32 or 64 bit. fpuCmp // fpuLoad32 represents a floating-point load, single-precision (32 bit). fpuLoad32 // fpuStore32 represents a floating-point store, single-precision (32 bit). fpuStore32 // fpuLoad64 represents a floating-point load, double-precision (64 bit). fpuLoad64 // fpuStore64 represents a floating-point store, double-precision (64 bit). fpuStore64 // fpuLoad128 represents a floating-point/vector load, 128 bit. fpuLoad128 // fpuStore128 represents a floating-point/vector store, 128 bit. fpuStore128 // loadFpuConst32 represents a load of a 32-bit floating-point constant. loadFpuConst32 // loadFpuConst64 represents a load of a 64-bit floating-point constant. loadFpuConst64 // loadFpuConst128 represents a load of a 128-bit floating-point constant. loadFpuConst128 // vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector. vecLoad1R // fpuToInt represents a conversion from FP to integer. fpuToInt // intToFpu represents a conversion from integer to FP. intToFpu // fpuCSel represents a 32/64-bit FP conditional select. fpuCSel // movToVec represents a move to a vector element from a GPR. movToVec // movFromVec represents an unsigned move from a vector element to a GPR. movFromVec // movFromVecSigned represents a signed move from a vector element to a GPR. movFromVecSigned // vecDup represents a duplication of general-purpose register to vector. vecDup // vecDupElement represents a duplication of a vector element to vector or scalar. vecDupElement // vecDupFromFpu represents a duplication of scalar to vector. vecDupFromFpu // vecExtract represents a vector extraction operation. vecExtract // vecExtend represents a vector extension operation. vecExtend // vecMovElement represents a move vector element to another vector element operation. vecMovElement // vecMiscNarrow represents a vector narrowing operation. vecMiscNarrow // vecRRR represents a vector ALU operation. vecRRR // vecMisc represents a vector two register miscellaneous instruction. vecMisc // vecLanes represents a vector instruction across lanes. vecLanes // vecShiftImm represents a SIMD scalar shift by immediate instruction. vecShiftImm // vecTbl represents a table vector lookup - single register table. vecTbl // vecTbl2 represents a table vector lookup - two register table. vecTbl2 // vecPermute represents a vector permute instruction. vecPermute // movToNZCV represents a move to the FPSR. movToFPSR // movFromNZCV represents a move from the FPSR. movFromFPSR // call represents a machine call instruction. call // callInd represents a machine indirect-call instruction. callInd // ret represents a machine return instruction. ret // br represents an unconditional branch. br // condBr represents a conditional branch. condBr // adr represents a compute the address (using a PC-relative offset) of a memory location. adr // brTableSequence represents a jump-table sequence. brTableSequence // exitSequence consists of multiple instructions, and exits the execution immediately. // See encodeExitSequence. exitSequence // UDF is the undefined instruction. For debugging only. udf // emitSourceOffsetInfo is a dummy instruction to emit source offset info. // The existence of this instruction does not affect the execution. emitSourceOffsetInfo // ------------------- do not define below this line ------------------- numInstructionKinds ) func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { i.kind = emitSourceOffsetInfo i.u1 = uint64(l) return i } func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { return ssa.SourceOffset(i.u1) } func (i *instruction) asUDF() *instruction { i.kind = udf return i } func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { i.kind = fpuToInt i.rn = rn i.rd = rd if rdSigned { i.u1 = 1 } if src64bit { i.u2 = 1 } if dst64bit { i.u3 = 1 } } func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { i.kind = intToFpu i.rn = rn i.rd = rd if rnSigned { i.u1 = 1 } if src64bit { i.u2 = 1 } if dst64bit { i.u3 = 1 } } func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { i.kind = exitSequence i.rn = operandNR(ctx) return i } // aluOp determines the type of ALU operation. Instructions whose kind is one of // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend // would use this type. type aluOp int func (a aluOp) String() string { switch a { case aluOpAdd: return "add" case aluOpSub: return "sub" case aluOpOrr: return "orr" case aluOpAnd: return "and" case aluOpBic: return "bic" case aluOpEor: return "eor" case aluOpAddS: return "adds" case aluOpSubS: return "subs" case aluOpSMulH: return "sMulH" case aluOpUMulH: return "uMulH" case aluOpSDiv: return "sdiv" case aluOpUDiv: return "udiv" case aluOpRotR: return "ror" case aluOpLsr: return "lsr" case aluOpAsr: return "asr" case aluOpLsl: return "lsl" case aluOpMAdd: return "madd" case aluOpMSub: return "msub" } panic(int(a)) } const ( // 32/64-bit Add. aluOpAdd aluOp = iota // 32/64-bit Subtract. aluOpSub // 32/64-bit Bitwise OR. aluOpOrr // 32/64-bit Bitwise AND. aluOpAnd // 32/64-bit Bitwise AND NOT. aluOpBic // 32/64-bit Bitwise XOR (Exclusive OR). aluOpEor // 32/64-bit Add setting flags. aluOpAddS // 32/64-bit Subtract setting flags. aluOpSubS // Signed multiply, high-word result. aluOpSMulH // Unsigned multiply, high-word result. aluOpUMulH // 64-bit Signed divide. aluOpSDiv // 64-bit Unsigned divide. aluOpUDiv // 32/64-bit Rotate right. aluOpRotR // 32/64-bit Logical shift right. aluOpLsr // 32/64-bit Arithmetic shift right. aluOpAsr // 32/64-bit Logical shift left. aluOpLsl /// Multiply-add // MAdd and MSub are only applicable for aluRRRR. aluOpMAdd aluOpMSub ) // vecOp determines the type of vector operation. Instructions whose kind is one of // vecOpCnt would use this type. type vecOp int // String implements fmt.Stringer. func (b vecOp) String() string { switch b { case vecOpCnt: return "cnt" case vecOpCmeq: return "cmeq" case vecOpCmgt: return "cmgt" case vecOpCmhi: return "cmhi" case vecOpCmge: return "cmge" case vecOpCmhs: return "cmhs" case vecOpFcmeq: return "fcmeq" case vecOpFcmgt: return "fcmgt" case vecOpFcmge: return "fcmge" case vecOpCmeq0: return "cmeq0" case vecOpUaddlv: return "uaddlv" case vecOpBit: return "bit" case vecOpBic: return "bic" case vecOpBsl: return "bsl" case vecOpNot: return "not" case vecOpAnd: return "and" case vecOpOrr: return "orr" case vecOpEOR: return "eor" case vecOpFadd: return "fadd" case vecOpAdd: return "add" case vecOpAddp: return "addp" case vecOpAddv: return "addv" case vecOpSub: return "sub" case vecOpFsub: return "fsub" case vecOpSmin: return "smin" case vecOpUmin: return "umin" case vecOpUminv: return "uminv" case vecOpSmax: return "smax" case vecOpUmax: return "umax" case vecOpUmaxp: return "umaxp" case vecOpUrhadd: return "urhadd" case vecOpFmul: return "fmul" case vecOpSqrdmulh: return "sqrdmulh" case vecOpMul: return "mul" case vecOpUmlal: return "umlal" case vecOpFdiv: return "fdiv" case vecOpFsqrt: return "fsqrt" case vecOpAbs: return "abs" case vecOpFabs: return "fabs" case vecOpNeg: return "neg" case vecOpFneg: return "fneg" case vecOpFrintp: return "frintp" case vecOpFrintm: return "frintm" case vecOpFrintn: return "frintn" case vecOpFrintz: return "frintz" case vecOpFcvtl: return "fcvtl" case vecOpFcvtn: return "fcvtn" case vecOpFcvtzu: return "fcvtzu" case vecOpFcvtzs: return "fcvtzs" case vecOpScvtf: return "scvtf" case vecOpUcvtf: return "ucvtf" case vecOpSqxtn: return "sqxtn" case vecOpUqxtn: return "uqxtn" case vecOpSqxtun: return "sqxtun" case vecOpRev64: return "rev64" case vecOpXtn: return "xtn" case vecOpShll: return "shll" case vecOpSshl: return "sshl" case vecOpSshll: return "sshll" case vecOpUshl: return "ushl" case vecOpUshll: return "ushll" case vecOpSshr: return "sshr" case vecOpZip1: return "zip1" case vecOpFmin: return "fmin" case vecOpFmax: return "fmax" } panic(int(b)) } const ( vecOpCnt vecOp = iota vecOpCmeq0 vecOpCmeq vecOpCmgt vecOpCmhi vecOpCmge vecOpCmhs vecOpFcmeq vecOpFcmgt vecOpFcmge vecOpUaddlv vecOpBit vecOpBic vecOpBsl vecOpNot vecOpAnd vecOpOrr vecOpEOR vecOpAdd vecOpFadd vecOpAddv vecOpSqadd vecOpUqadd vecOpAddp vecOpSub vecOpFsub vecOpSqsub vecOpUqsub vecOpSmin vecOpUmin vecOpUminv vecOpFmin vecOpSmax vecOpUmax vecOpUmaxp vecOpFmax vecOpUrhadd vecOpMul vecOpFmul vecOpSqrdmulh vecOpUmlal vecOpFdiv vecOpFsqrt vecOpAbs vecOpFabs vecOpNeg vecOpFneg vecOpFrintm vecOpFrintn vecOpFrintp vecOpFrintz vecOpFcvtl vecOpFcvtn vecOpFcvtzs vecOpFcvtzu vecOpScvtf vecOpUcvtf vecOpSqxtn vecOpSqxtun vecOpUqxtn vecOpRev64 vecOpXtn vecOpShll vecOpSshl vecOpSshll vecOpUshl vecOpUshll vecOpSshr vecOpZip1 ) // bitOp determines the type of bitwise operation. Instructions whose kind is one of // bitOpRbit and bitOpClz would use this type. type bitOp int // String implements fmt.Stringer. func (b bitOp) String() string { switch b { case bitOpRbit: return "rbit" case bitOpClz: return "clz" } panic(int(b)) } const ( // 32/64-bit Rbit. bitOpRbit bitOp = iota // 32/64-bit Clz. bitOpClz ) // fpuUniOp represents a unary floating-point unit (FPU) operation. type fpuUniOp byte const ( fpuUniOpNeg fpuUniOp = iota fpuUniOpCvt32To64 fpuUniOpCvt64To32 fpuUniOpSqrt fpuUniOpRoundPlus fpuUniOpRoundMinus fpuUniOpRoundZero fpuUniOpRoundNearest fpuUniOpAbs ) // String implements the fmt.Stringer. func (f fpuUniOp) String() string { switch f { case fpuUniOpNeg: return "fneg" case fpuUniOpCvt32To64: return "fcvt" case fpuUniOpCvt64To32: return "fcvt" case fpuUniOpSqrt: return "fsqrt" case fpuUniOpRoundPlus: return "frintp" case fpuUniOpRoundMinus: return "frintm" case fpuUniOpRoundZero: return "frintz" case fpuUniOpRoundNearest: return "frintn" case fpuUniOpAbs: return "fabs" } panic(int(f)) } // fpuBinOp represents a binary floating-point unit (FPU) operation. type fpuBinOp byte const ( fpuBinOpAdd = iota fpuBinOpSub fpuBinOpMul fpuBinOpDiv fpuBinOpMax fpuBinOpMin ) // String implements the fmt.Stringer. func (f fpuBinOp) String() string { switch f { case fpuBinOpAdd: return "fadd" case fpuBinOpSub: return "fsub" case fpuBinOpMul: return "fmul" case fpuBinOpDiv: return "fdiv" case fpuBinOpMax: return "fmax" case fpuBinOpMin: return "fmin" } panic(int(f)) } // extMode represents the mode of a register operand extension. // For example, aluRRRExtend instructions need this info to determine the extensions. type extMode byte const ( extModeNone extMode = iota // extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32. extModeZeroExtend32 // extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32. extModeSignExtend32 // extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64. extModeZeroExtend64 // extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64. extModeSignExtend64 ) func (e extMode) bits() byte { switch e { case extModeZeroExtend32, extModeSignExtend32: return 32 case extModeZeroExtend64, extModeSignExtend64: return 64 default: return 0 } } func (e extMode) signed() bool { switch e { case extModeSignExtend32, extModeSignExtend64: return true default: return false } } func extModeOf(t ssa.Type, signed bool) extMode { switch t.Bits() { case 32: if signed { return extModeSignExtend32 } return extModeZeroExtend32 case 64: if signed { return extModeSignExtend64 } return extModeZeroExtend64 default: panic("TODO? do we need narrower than 32 bits?") } } type extendOp byte const ( extendOpUXTB extendOp = 0b000 extendOpUXTH extendOp = 0b001 extendOpUXTW extendOp = 0b010 // extendOpUXTX does nothing, but convenient symbol that officially exists. See: // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct extendOpUXTX extendOp = 0b011 extendOpSXTB extendOp = 0b100 extendOpSXTH extendOp = 0b101 extendOpSXTW extendOp = 0b110 // extendOpSXTX does nothing, but convenient symbol that officially exists. See: // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct extendOpSXTX extendOp = 0b111 extendOpNone extendOp = 0xff ) func (e extendOp) srcBits() byte { switch e { case extendOpUXTB, extendOpSXTB: return 8 case extendOpUXTH, extendOpSXTH: return 16 case extendOpUXTW, extendOpSXTW: return 32 case extendOpUXTX, extendOpSXTX: return 64 } panic(int(e)) } func (e extendOp) String() string { switch e { case extendOpUXTB: return "UXTB" case extendOpUXTH: return "UXTH" case extendOpUXTW: return "UXTW" case extendOpUXTX: return "UXTX" case extendOpSXTB: return "SXTB" case extendOpSXTH: return "SXTH" case extendOpSXTW: return "SXTW" case extendOpSXTX: return "SXTX" } panic(int(e)) } func extendOpFrom(signed bool, from byte) extendOp { switch from { case 8: if signed { return extendOpSXTB } return extendOpUXTB case 16: if signed { return extendOpSXTH } return extendOpUXTH case 32: if signed { return extendOpSXTW } return extendOpUXTW case 64: if signed { return extendOpSXTX } return extendOpUXTX } panic("invalid extendOpFrom") } type shiftOp byte const ( shiftOpLSL shiftOp = 0b00 shiftOpLSR shiftOp = 0b01 shiftOpASR shiftOp = 0b10 shiftOpROR shiftOp = 0b11 ) func (s shiftOp) String() string { switch s { case shiftOpLSL: return "lsl" case shiftOpLSR: return "lsr" case shiftOpASR: return "asr" case shiftOpROR: return "ror" } panic(int(s)) } const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence. // size returns the size of the instruction in encoded bytes. func (i *instruction) size() int64 { switch i.kind { case exitSequence: return exitSequenceSize // 5 instructions as in encodeExitSequence. case nop0: return 0 case emitSourceOffsetInfo: return 0 case loadFpuConst32: if i.u1 == 0 { return 4 // zero loading can be encoded as a single instruction. } return 4 + 4 + 4 case loadFpuConst64: if i.u1 == 0 { return 4 // zero loading can be encoded as a single instruction. } return 4 + 4 + 8 case loadFpuConst128: if i.u1 == 0 && i.u2 == 0 { return 4 // zero loading can be encoded as a single instruction. } return 4 + 4 + 16 case brTableSequence: return 4*4 + int64(len(i.targets))*4 default: return 4 } } // vecArrangement is the arrangement of data within a vector register. type vecArrangement byte const ( // vecArrangementNone is an arrangement indicating no data is stored. vecArrangementNone vecArrangement = iota // vecArrangement8B is an arrangement of 8 bytes (64-bit vector) vecArrangement8B // vecArrangement16B is an arrangement of 16 bytes (128-bit vector) vecArrangement16B // vecArrangement4H is an arrangement of 4 half precisions (64-bit vector) vecArrangement4H // vecArrangement8H is an arrangement of 8 half precisions (128-bit vector) vecArrangement8H // vecArrangement2S is an arrangement of 2 single precisions (64-bit vector) vecArrangement2S // vecArrangement4S is an arrangement of 4 single precisions (128-bit vector) vecArrangement4S // vecArrangement1D is an arrangement of 1 double precision (64-bit vector) vecArrangement1D // vecArrangement2D is an arrangement of 2 double precisions (128-bit vector) vecArrangement2D // Assign each vector size specifier to a vector arrangement ID. // Instructions can only have an arrangement or a size specifier, but not both, so it // simplifies the internal representation of vector instructions by being able to // store either into the same field. // vecArrangementB is a size specifier of byte vecArrangementB // vecArrangementH is a size specifier of word (16-bit) vecArrangementH // vecArrangementS is a size specifier of double word (32-bit) vecArrangementS // vecArrangementD is a size specifier of quad word (64-bit) vecArrangementD // vecArrangementQ is a size specifier of the entire vector (128-bit) vecArrangementQ ) // String implements fmt.Stringer func (v vecArrangement) String() (ret string) { switch v { case vecArrangement8B: ret = "8B" case vecArrangement16B: ret = "16B" case vecArrangement4H: ret = "4H" case vecArrangement8H: ret = "8H" case vecArrangement2S: ret = "2S" case vecArrangement4S: ret = "4S" case vecArrangement1D: ret = "1D" case vecArrangement2D: ret = "2D" case vecArrangementB: ret = "B" case vecArrangementH: ret = "H" case vecArrangementS: ret = "S" case vecArrangementD: ret = "D" case vecArrangementQ: ret = "Q" case vecArrangementNone: ret = "none" default: panic(v) } return } // vecIndex is the index of an element of a vector register type vecIndex byte // vecIndexNone indicates no vector index specified. const vecIndexNone = ^vecIndex(0) func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement { switch lane { case ssa.VecLaneI8x16: return vecArrangement16B case ssa.VecLaneI16x8: return vecArrangement8H case ssa.VecLaneI32x4: return vecArrangement4S case ssa.VecLaneI64x2: return vecArrangement2D case ssa.VecLaneF32x4: return vecArrangement4S case ssa.VecLaneF64x2: return vecArrangement2D default: panic(lane) } }