Files
wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
2023-10-02 08:31:41 +09:00

291 lines
8.3 KiB
Go

package arm64
import (
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
)
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
vr = m.compiler.AllocateVReg(regalloc.RegTypeOf(valType))
m.InsertLoadConstant(instr, vr)
return
}
// InsertLoadConstant implements backend.Machine.
func (m *machine) InsertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
val := instr.Return()
valType := val.Type()
v := instr.ConstantVal()
if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
v = v & ((1 << valType.Bits()) - 1)
}
switch valType {
case ssa.TypeF32:
loadF := m.allocateInstr()
loadF.asLoadFpuConst32(vr, v)
m.insert(loadF)
case ssa.TypeF64:
loadF := m.allocateInstr()
loadF.asLoadFpuConst64(vr, v)
m.insert(loadF)
case ssa.TypeI32:
if v == 0 {
m.InsertMove(vr, xzrVReg, ssa.TypeI32)
} else {
m.lowerConstantI32(vr, int32(v))
}
case ssa.TypeI64:
if v == 0 {
m.InsertMove(vr, xzrVReg, ssa.TypeI64)
} else {
m.lowerConstantI64(vr, int64(v))
}
default:
panic("TODO")
}
}
// The following logics are based on the old asm/arm64 package.
// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
ic := int64(uint32(c))
if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c)) {
m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
return
}
}
if t := const16bitAligned(int64(uint32(c))); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
} else if t := const16bitAligned(int64(^c)); t >= 0 {
// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
} else if isBitMaskImmediate(uint64(uint32(c))) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
} else {
// Otherwise, we use MOVZ and MOVK to load it.
c16 := uint16(c)
m.insertMOVZ(dst, uint64(c16), 0, false)
c16 = uint16(uint32(c) >> 16)
m.insertMOVK(dst, uint64(c16), 1, false)
}
}
func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
// Following the logic here:
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
if isBitMaskImmediate(uint64(c)) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
return
}
}
if t := const16bitAligned(c); t >= 0 {
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
// We could load it into temporary with movk.
m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
} else if t := const16bitAligned(^c); t >= 0 {
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
} else if isBitMaskImmediate(uint64(c)) {
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
} else {
m.load64bitConst(c, dst)
}
}
func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
instr := m.allocateInstr()
instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
m.insert(instr)
}
// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
//
// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
//
// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
func isBitMaskImmediate(x uint64) bool {
// All zeros and ones are not "bitmask immediate" by definition.
if x == 0 || x == 0xffff_ffff_ffff_ffff {
return false
}
switch {
case x != x>>32|x<<32:
// e = 64
case x != x>>16|x<<48:
// e = 32 (x == x>>32|x<<32).
// e.g. 0x00ff_ff00_00ff_ff00
x = uint64(int32(x))
case x != x>>8|x<<56:
// e = 16 (x == x>>16|x<<48).
// e.g. 0x00ff_00ff_00ff_00ff
x = uint64(int16(x))
case x != x>>4|x<<60:
// e = 8 (x == x>>8|x<<56).
// e.g. 0x0f0f_0f0f_0f0f_0f0f
x = uint64(int8(x))
default:
// e = 4 or 2.
return true
}
return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
}
// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
// For example: 0b1110 -> true, 0b1010 -> false
func sequenceOfSetbits(x uint64) bool {
y := getLowestBit(x)
// If x is a sequence of set bit, this should results in the number
// with only one set bit (i.e. power of two).
y += x
return (y-1)&y == 0
}
func getLowestBit(x uint64) uint64 {
return x & (^x + 1)
}
// const16bitAligned check if the value is on the 16-bit alignment.
// If so, returns the shift num divided by 16, and otherwise -1.
func const16bitAligned(v int64) (ret int) {
ret = -1
for s := 0; s < 64; s += 16 {
if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
ret = s / 16
break
}
}
return
}
// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
// consts as in the Go assembler.
//
// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
var bits [4]uint64
var zeros, negs int
for i := 0; i < 4; i++ {
bits[i] = uint64(c) >> uint(i*16) & 0xffff
if v := bits[i]; v == 0 {
zeros++
} else if v == 0xffff {
negs++
}
}
if zeros == 3 {
// one MOVZ instruction.
for i, v := range bits {
if v != 0 {
m.insertMOVZ(dst, v, i, true)
}
}
} else if negs == 3 {
// one MOVN instruction.
for i, v := range bits {
if v != 0xffff {
v = ^v
m.insertMOVN(dst, v, i, true)
}
}
} else if zeros == 2 {
// one MOVZ then one OVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
} else if negs == 2 {
// one MOVN then one or two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
m.insertMOVN(dst, v, i, true)
movn = true
} else if v != 0xffff {
m.insertMOVK(dst, v, i, true)
}
}
} else if zeros == 1 {
// one MOVZ then two MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
} else if negs == 1 {
// one MOVN then two MOVK.
var movn bool
for i, v := range bits { // Emit MOVN.
if !movn && v != 0xffff {
v = ^v
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
m.insertMOVN(dst, v, i, true)
movn = true
} else if v != 0xffff {
m.insertMOVK(dst, v, i, true)
}
}
} else {
// one MOVZ then up to three MOVK.
var movz bool
for i, v := range bits {
if !movz && v != 0 { // MOVZ.
m.insertMOVZ(dst, v, i, true)
movz = true
} else if v != 0 {
m.insertMOVK(dst, v, i, true)
}
}
}
}
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVZ(dst, v, uint64(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVK(dst, v, uint64(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
instr.asMOVN(dst, v, uint64(shift), dst64)
m.insert(instr)
}