wazevo(arm64): reuses queue in addr lowering (#1835)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-11-13 12:06:30 +09:00
committed by GitHub
parent 2d760b400f
commit 6a7e474e02
3 changed files with 67 additions and 50 deletions

View File

@@ -307,37 +307,35 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte
// During the construction, this might emit additional instructions.
//
// Extracted as a separate function for easy testing.
func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc.VReg, size byte, offset int64) (amode addressMode) {
switch a64sExist, a32sExist := len(a64s) > 0, len(a32s) > 0; {
func (m *machine) lowerToAddressModeFromAddends(a32s *queue[addend32], a64s *queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
switch a64sExist, a32sExist := !a64s.empty(), !a32s.empty(); {
case a64sExist && a32sExist:
var base regalloc.VReg
base, a64s = dequeue(a64s)
base = a64s.dequeue()
var a32 addend32
a32, a32s = dequeue(a32s)
a32 = a32s.dequeue()
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
var base regalloc.VReg
base, a64s = dequeue(a64s)
base = a64s.dequeue()
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
offset = 0
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
var base regalloc.VReg
base, a64s = dequeue(a64s)
base = a64s.dequeue()
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
offset = 0
case a64sExist:
var base regalloc.VReg
base, a64s = dequeue(a64s)
if len(a64s) > 0 {
var index regalloc.VReg
index, a64s = dequeue(a64s)
base = a64s.dequeue()
if !a64s.empty() {
index := a64s.dequeue()
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
} else {
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
case a32sExist:
var base32 addend32
base32, a32s = dequeue(a32s)
base32 := a32s.dequeue()
// First we need 64-bit base.
base := m.compiler.AllocateVReg(ssa.TypeI64)
@@ -349,9 +347,8 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
baseExt.asExtend(base, base32.r, 32, 64, signed)
m.insert(baseExt)
if len(a32s) > 0 {
var index addend32
index, a32s = dequeue(a32s)
if !a32s.empty() {
index := a32s.dequeue()
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
} else {
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
@@ -368,11 +365,13 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
}
for _, a64 := range a64s {
for !a64s.empty() {
a64 := a64s.dequeue()
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
}
for _, a32 := range a32s {
for !a32s.empty() {
a32 := a32s.dequeue()
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
}
amode.rn = baseReg
@@ -381,21 +380,22 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64 []regalloc.VReg, offset int64) {
m.addends64 = m.addends64[:0]
m.addends32 = m.addends32[:0]
m.addendsWorkQueue = append(m.addendsWorkQueue[:0], ptr)
func (m *machine) collectAddends(ptr ssa.Value) (addends32 *queue[addend32], addends64 *queue[regalloc.VReg], offset int64) {
m.addendsWorkQueue.reset()
m.addends32.reset()
m.addends64.reset()
m.addendsWorkQueue.enqueue(ptr)
for len(m.addendsWorkQueue) > 0 {
var v ssa.Value
v, m.addendsWorkQueue = dequeue(m.addendsWorkQueue)
for !m.addendsWorkQueue.empty() {
v := m.addendsWorkQueue.dequeue()
def := m.compiler.ValueDefinition(v)
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
case ssa.OpcodeIadd:
// If the addend is an add, we recursively collect its operands.
x, y := def.Instr.Arg2()
m.addendsWorkQueue = append(m.addendsWorkQueue, x, y)
m.addendsWorkQueue.enqueue(x)
m.addendsWorkQueue.enqueue(y)
def.Instr.MarkLowered()
case ssa.OpcodeIconst:
// If the addend is constant, we just statically merge it into the offset.
@@ -411,7 +411,7 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
switch input := def.Instr.Arg(); input.Type().Bits() {
case 64:
// If the input is already 64-bit, this extend is a no-op. TODO: shouldn't this be optimized out at much earlier stage? no?
m.addends64 = append(m.addends64, m.getOperand_NR(m.compiler.ValueDefinition(input), extModeNone).nr())
m.addends64.enqueue(m.getOperand_NR(m.compiler.ValueDefinition(input), extModeNone).nr())
def.Instr.MarkLowered()
continue
case 32:
@@ -432,7 +432,7 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
// Sign-extension of a 32-bit constant can be merged into the offset.
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
default:
m.addends32 = append(m.addends32, addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
m.addends32.enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
}
def.Instr.MarkLowered()
continue
@@ -443,10 +443,10 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
panic("TODO: add tests")
default:
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
m.addends64 = append(m.addends64, m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
m.addends64.enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
}
}
return m.addends32, m.addends64, offset
return &m.addends32, &m.addends64, offset
}
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
@@ -481,7 +481,27 @@ func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regal
return
}
func dequeue[T any](q []T) (ret T, popped []T) {
ret, popped = (q)[0], (q)[1:]
// queue is the resettable queue where the underlying slice is reused.
type queue[T any] struct {
index int
data []T
}
func (q *queue[T]) enqueue(v T) {
q.data = append(q.data, v)
}
func (q *queue[T]) dequeue() (ret T) {
ret = q.data[q.index]
q.index++
return
}
func (q *queue[T]) empty() bool {
return q.index >= len(q.data)
}
func (q *queue[T]) reset() {
q.index = 0
q.data = q.data[:0]
}

View File

@@ -472,9 +472,9 @@ func TestMachine_collectAddends(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
ctx, b, m := newSetupWithMockContext()
ptr, verify := tc.setup(ctx, b, m)
actual32s, actual64s, actualOffset := m.collectAddends(ptr)
require.Equal(t, tc.exp32s, actual32s)
require.Equal(t, tc.exp64s, actual64s)
actual32sQ, actual64sQ, actualOffset := m.collectAddends(ptr)
require.Equal(t, tc.exp32s, actual32sQ.data)
require.Equal(t, tc.exp64s, actual64sQ.data)
require.Equal(t, tc.offset, actualOffset)
verify(t)
})
@@ -567,18 +567,6 @@ func TestMachine_addRegToReg64Ext(t *testing.T) {
}
}
func Test_dequeue(t *testing.T) {
ints := []int{1, 2, 3}
one, intPopped := dequeue(ints)
require.Equal(t, 1, one)
require.Equal(t, []int{2, 3}, intPopped)
strs := []string{"a", "b", "c"}
a, strPopped := dequeue(strs)
require.Equal(t, "a", a)
require.Equal(t, []string{"b", "c"}, strPopped)
}
func TestMachine_lowerToAddressModeFromAddends(t *testing.T) {
x1, x2, x3 := regalloc.FromRealReg(x1, regalloc.RegTypeInt), regalloc.FromRealReg(x2, regalloc.RegTypeInt), regalloc.FromRealReg(x3, regalloc.RegTypeInt)
x4, x5, x6 := regalloc.FromRealReg(x4, regalloc.RegTypeInt), regalloc.FromRealReg(x5, regalloc.RegTypeInt), regalloc.FromRealReg(x6, regalloc.RegTypeInt)
@@ -823,7 +811,16 @@ func TestMachine_lowerToAddressModeFromAddends(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
ctx, _, m := newSetupWithMockContext()
ctx.vRegCounter = int(nextVReg.ID()) - 1
actual := m.lowerToAddressModeFromAddends(tc.a32s, tc.a64s, tc.dstSizeInBits, tc.offset)
var a32s queue[addend32]
var a64s queue[regalloc.VReg]
for _, a32 := range tc.a32s {
a32s.enqueue(a32)
}
for _, a64 := range tc.a64s {
a64s.enqueue(a64)
}
actual := m.lowerToAddressModeFromAddends(&a32s, &a64s, tc.dstSizeInBits, tc.offset)
require.Equal(t, strings.Join(tc.insts, "\n"), formatEmittedInstructionsInCurrentBlock(m))
require.Equal(t, tc.exp, actual, actual.format(tc.dstSizeInBits))
})

View File

@@ -37,10 +37,10 @@ type (
labelPositionPool wazevoapi.Pool[labelPosition]
// addendsWorkQueue is used during address lowering, defined here for reuse.
addendsWorkQueue []ssa.Value
addends32 []addend32
addendsWorkQueue queue[ssa.Value]
addends32 queue[addend32]
// addends64 is used during address lowering, defined here for reuse.
addends64 []regalloc.VReg
addends64 queue[regalloc.VReg]
unresolvedAddressModes []*instruction
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.